diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2eb31bf0..06c0f71e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -74,7 +74,7 @@ repos: - id: black additional_dependencies: ['click==8.0.4'] - repo: https://github.com/pycqa/flake8 - rev: 6.1.0 + rev: 7.1.1 hooks: - id: flake8 - repo: local diff --git a/README.md b/README.md index 9d10000d..60cbfca0 100644 --- a/README.md +++ b/README.md @@ -1,437 +1,326 @@ [![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](https://aihub.qualcomm.com) -# Qualcomm® AI Hub Models - -The [Qualcomm® AI Hub Models](https://aihub.qualcomm.com/) are a collection of -state-of-the-art machine learning models optimized for performance (latency, -memory etc.) and ready to deploy on Qualcomm® devices. - -* Explore models optimized for on-device deployment of vision, speech, text, and genenrative AI. -* View open-source recipes to quantize, optimize, and deploy these models on-device. -* Browse through [performance metrics](https://aihub.qualcomm.com/models) captured for these models on several devices. -* Access the models through [Hugging Face](https://huggingface.co/qualcomm). -* Check out [sample apps](https://github.com/quic/ai-hub-apps) for on-device deployment of AI Hub models. -* [Sign up](https://myaccount.qualcomm.com/signup) to run these models on hosted Qualcomm® devices. - -Supported **python package host machine** Operating Systems: -- Linux (x86, ARM) -- Windows (x86) -- Windows (ARM-- ONLY via x86 Python, not ARM Python) -- MacOS (x86, ARM) - -Supported runtimes -* [TensorFlow Lite](https://www.tensorflow.org/lite) -* [Qualcomm AI Engine Direct](https://www.qualcomm.com/developer/artificial-intelligence#overview) -* [ONNX](https://onnxruntime.ai/docs/execution-providers/QNN-ExecutionProvider.html) - -Models can be deployed on: -* Android -* Windows -* Linux - -Supported compute units -* CPU, GPU, NPU (includes [Hexagon DSP](https://developer.qualcomm.com/software/hexagon-dsp-sdk/dsp-processor), [HTP](https://developer.qualcomm.com/hardware/qualcomm-innovators-development-kit/ai-resources-overview/ai-hardware-cores-accelerators)) - -Supported precision -* Floating Points: FP16 -* Integer: INT8 (8-bit weight and activation on select models), INT4 (4-bit weight, 16-bit activation on select models) - -Supported chipsets -* [Snapdragon 845](https://www.qualcomm.com/products/mobile/snapdragon/smartphones/snapdragon-8-series-mobile-platforms/snapdragon-845-mobile-platform), [Snapdragon 855/855+](https://www.qualcomm.com/products/mobile/snapdragon/smartphones/snapdragon-8-series-mobile-platforms/snapdragon-855-mobile-platform), [Snapdragon 865/865+](https://www.qualcomm.com/products/mobile/snapdragon/smartphones/snapdragon-8-series-mobile-platforms/snapdragon-865-plus-5g-mobile-platform), [Snapdragon 888/888+](https://www.qualcomm.com/products/mobile/snapdragon/smartphones/snapdragon-8-series-mobile-platforms/snapdragon-888-5g-mobile-platform) -* [Snapdragon 8 Elite](https://www.qualcomm.com/products/mobile/snapdragon/smartphones/snapdragon-8-series-mobile-platforms/snapdragon-8-elite-mobile-platform), [Snapdragon 8 Gen 3](https://www.qualcomm.com/products/mobile/snapdragon/smartphones/snapdragon-8-series-mobile-platforms/snapdragon-8-gen-3-mobile-platform), [Snapdragon 8 Gen 2](https://www.qualcomm.com/products/mobile/snapdragon/smartphones/snapdragon-8-series-mobile-platforms/snapdragon-8-gen-2-mobile-platform), [Snapdragon 8 Gen 1](https://www.qualcomm.com/products/mobile/snapdragon/smartphones/snapdragon-8-series-mobile-platforms/snapdragon-8-gen-1-mobile-platform) -* [Snapdragon X Elite](https://www.qualcomm.com/products/mobile/snapdragon/pcs-and-tablets/snapdragon-x-elite) - -Select supported devices -* Samsung Galaxy S21 Series, Galaxy S22 Series, Galaxy S23 Series, Galaxy S24 Series -* Xiaomi 12, 13 -* Google Pixel 3, 4, 5 -* Snapdragon X Elite CRD (Compute Reference Device) +# [Qualcomm® AI Hub Models](https://aihub.qualcomm.com/) -and many more. +[![Release](https://img.shields.io/github/v/release/quic/ai-hub-models)](https://github.com/quic/ai-hub-models/releases/latest) +[![Tag](https://img.shields.io/github/v/tag/quic/ai-hub-models)](https://github.com/quic/ai-hub-models/releases/latest) +[![PyPi](https://img.shields.io/pypi/v/qai-hub-models)](https://pypi.org/project/qai-hub-models/) +![Python 3.9, 3.10, 3.11, 3.12](https://img.shields.io/badge/python-3.9%2C%203.10%20(Recommended)%2C%203.11%2C%203.12-yellow) -## Installation +The Qualcomm® AI Hub Models are a collection of +state-of-the-art machine learning models optimized for deployment on Qualcomm® devices. -We currently support **Python 3.9, 3.10 (recommended), 3.11, and 3.12.** We recommend using a Python -virtual environment -([miniconda](https://docs.anaconda.com/free/miniconda/miniconda-install/) or -[virtualenv](https://virtualenv.pypa.io/en/latest/)). +* [List of Models by Category](#model-directory) +* [On-Device Performance Data](https://aihub.qualcomm.com/models) +* [Device-Native Sample Apps](https://github.com/quic/ai-hub-apps) -*NOTE: Many quantized models are supported only with python 3.10*. +See supported: [On-Device Runtimes](#on-device-runtimes), [Hardware Targets & Precision](#device-hardware--precision), [Chipsets](#chipsets), [Devices](#devices) -You can setup a virtualenv using: -``` -python -m venv qai_hub_models_env && source qai_hub_models_env/bin/activate -``` +  -Once the environment is setup, you can install the base package using: +## Setup -```shell -pip install qai_hub_models -``` +### 1. Install Python Package -Some models (e.g. [YOLOv7](https://github.com/WongKinYiu/yolov7)) require -additional dependencies. You can install those dependencies automatically -using: +The package is available via pip: ```shell -pip install "qai_hub_models[yolov7]" -``` - -## Getting Started +# NOTE for Snapdragon X Elite users: +# Only AMDx64 (64-bit) Python in supported on Windows. +# Installation will fail when using Windows ARM64 Python. -Each model comes with the following set of CLI demos: -* Locally runnable PyTorch based CLI demo to validate the model off device. -* On-device CLI demo that produces a model ready for on-device deployment and runs the model on a hosted Qualcomm® device (needs [sign up](https://myaccount.qualcomm.com/signup)). - -All the models produced by these demos are freely available on [Hugging -Face](https://huggingface.co/qualcomm) or through our -[website](https://aihub.qualcomm.com/models). See the individual model readme -files (e.g. [YOLOv7](qai_hub_models/models/yolov7/README.md)) for more -details. - -### Local CLI Demo with PyTorch +pip install qai_hub_models +``` -[All models](#model-directory) contain CLI demos that run the model in -**PyTorch** locally with sample input. Demos are optimized for code clarity -rather than latency, and run exclusively in PyTorch. Optimal model latency can -be achieved with model export via [Qualcomm® AI -Hub](https://www.aihub.qualcomm.com). +Some models (e.g. [YOLOv7](https://github.com/quic/ai-hub-models/tree/main/qai_hub_models/models/yolov7)) require +additional dependencies that can be installed as follows: ```shell -python -m qai_hub_models.models.yolov7.demo +pip install "qai_hub_models[yolov7]" ``` -For additional details on how to use the demo CLI, use the `--help` option -```shell -python -m qai_hub_models.models.yolov7.demo --help -``` +  -See the [model directory](#model-directory) below to explore all other models. +### 2. Configure AI Hub Access ---- +Many features of AI Hub Models _(such as model compilation, on-device profiling, etc.)_ require access to Qualcomm® AI Hub: -Note that most ML use cases require some pre and post-processing that are not -part of the model itself. A python reference implementation of this is provided -for each model in `app.py`. Apps load & pre-process model input, run model -inference, and post-process model output before returning it to you. +- [Create a Qualcomm® ID](https://myaccount.qualcomm.com/signup), and use it to [login to Qualcomm® AI Hub](https://app.aihub.qualcomm.com/). +- Configure your [API token](https://app.aihub.qualcomm.com/account/): `qai-hub configure --api_token API_TOKEN` -Here is an example of how the PyTorch CLI works for [YOLOv7](https://github.com/WongKinYiu/yolov7): +  -```python -from PIL import Image -from qai_hub_models.models.yolov7 import Model as YOLOv7Model -from qai_hub_models.models.yolov7 import App as YOLOv7App -from qai_hub_models.utils.asset_loaders import load_image -from qai_hub_models.models.yolov7.demo import IMAGE_ADDRESS +## Getting Started -# Load pre-trained model -torch_model = YOLOv7Model.from_pretrained() +### Export and Run A Model on a Physical Device -# Load a simple PyTorch based application -app = YOLOv7App(torch_model) -image = load_image(IMAGE_ADDRESS, "yolov7") +All [models in our directory](#model-directory) can be compiled and profiled on a hosted +Qualcomm® device: -# Perform prediction on a sample image -pred_image = app.predict(image)[0] -Image.fromarray(pred_image).show() +```shell +pip install "qai_hub_models[yolov7]" +python -m qai_hub_models.models.yolov7.export [--target-runtime ...] [--device ...] [--help] ``` -### CLI demo to run on hosted Qualcomm® devices +_Using Qualcomm® AI Hub_, the export script will: -[Some models](#model-directory) contain CLI demos that run the model on a hosted -Qualcomm® device using [Qualcomm® AI Hub](https://aihub.qualcomm.com). +1. **Compile** the model for the chosen device and target runtime (see: [Compiling Models on AI Hub](https://app.aihub.qualcomm.com/docs/hub/compile_examples.html)). +2. If applicable, **Quantize** the model (see: [Quantization on AI Hub](https://app.aihub.qualcomm.com/docs/hub/quantize_examples.html)) +3. **Profile** the compiled model on a real device in the cloud (see: [Profiling Models on AI Hub](https://app.aihub.qualcomm.com/docs/hub/profile_examples.html)). +4. **Run inference** with a sample input data on a real device in the cloud, and compare on-device model output with PyTorch output (see: [Running Inference on AI Hub](https://app.aihub.qualcomm.com/docs/hub/inference_examples.html)) +5. **Download** the compiled model to disk. -To run the model on a hosted device, [sign up for access to Qualcomm® AI -Hub](https://myaccount.qualcomm.com/signup). Sign-in to Qualcomm® AI Hub with your -Qualcomm® ID. Once signed in navigate to Account -> Settings -> API Token. +  -With this API token, you can configure your client to run models on the cloud -hosted devices. +### End-To-End Model Demos -```shell -qai-hub configure --api_token API_TOKEN -``` -Navigate to [docs](https://app.aihub.qualcomm.com/docs/) for more information. - -The on-device CLI demo performs the following: -* Exports the model for on-device execution. -* Profiles the model on-device on a cloud hosted Qualcomm® device. -* Runs the model on-device on a cloud hosted Qualcomm® device and compares accuracy between a local CPU based PyTorch run and the on-device run. -* Downloads models (and other required assets) that can be deployed on-device in an Android application. +Most [models in our directory](#model-directory) contain CLI demos that run the model _end-to-end_: ```shell -python -m qai_hub_models.models.yolov7.export -``` - -Many models may have initialization parameters that allow loading custom -weights and checkpoints. See `--help` for more details - -```shell -python -m qai_hub_models.models.yolov7.export --help +pip install "qai_hub_models[yolov7]" +# Predict and draw bounding boxes on the provided image +python -m qai_hub_models.models.yolov7.demo [--image ...] [--on-device] [--help] ``` -#### How does this export script work? - -As described above, the script above compiles, optimizes, and runs the model on -a cloud hosted Qualcomm® device. The demo uses [Qualcomm® AI Hub's Python -APIs](https://app.aihub.qualcomm.com/docs/). - -Qualcomm® AI Hub explained +_End-to-end_ demos: +1. **Preprocess** human-readable input into model input +2. Run **model inference** +3. **Postprocess** model output to a human-readable format -Here is a simplified example of code that can be used to run the entire model -on a cloud hosted device: +**Many end-to-end demos use AI Hub to run inference on a real cloud-hosted device** _(if the `--on-device` flag is set)_. All end-to-end demos also run locally via PyTorch. -```python -import torch -import qai_hub as hub -from qai_hub_models.models.yolov7 import Model as YOLOv7Model +  -# Load YOLOv7 in PyTorch -torch_model = YOLOv7Model.from_pretrained() -torch_model.eval() +### Sample Applications -# Trace the PyTorch model using one data point of provided sample inputs to -# torch tensor to trace the model. -example_input = [torch.tensor(data[0]) for name, data in torch_model.sample_inputs().items()] -pt_model = torch.jit.trace(torch_model, example_input) +**Native** applications that can run our models (with pre- and post-processing) on physical devices are published in the [AI Hub Apps repository](https://github.com/quic/ai-hub-apps/). -# Select a device -device = hub.Device("Samsung Galaxy S23") +**Python** applications are defined for all models [(from qai_hub_models.models.\ import App)](https://github.com/quic/ai-hub-models/blob/main/qai_hub_models/models/yolov7/app.py). These apps wrap model inference with pre- and post-processing steps written using torch & numpy. **These apps are optimized to be an easy-to-follow example, rather than to minimize prediction time.** -# Compile model for a specific device -compile_job = hub.submit_compile_job( - model=pt_model, - device=device, - input_specs=torch_model.get_input_spec(), -) +  -# Get target model to run on a cloud hosted device -target_model = compile_job.get_target_model() +## Model Support Data -# Profile the previously compiled model on a cloud hosted device -profile_job = hub.submit_profile_job( - model=target_model, - device=device, -) - -# Perform on-device inference on a cloud hosted device -input_data = torch_model.sample_inputs() -inference_job = hub.submit_inference_job( - model=target_model, - device=device, - inputs=input_data, -) - -# Returns the output as dict{name: numpy} -on_device_output = inference_job.download_output_data() -``` - ---- - -### Working with source code - -You can clone the repository using: - -```shell -git clone https://github.com/quic/ai-hub-models/blob/main -cd main -pip install -e . -``` +### On-Device Runtimes -Install additional dependencies to prepare a model before using the following: -```shell -cd main -pip install -e ".[yolov7]" -``` +| Runtime | Supported OS | +| -- | -- | +| [Qualcomm AI Engine Direct](https://www.qualcomm.com/developer/artificial-intelligence#overview) | Android, Linux, Windows +| [LiteRT (TensorFlow Lite)](https://www.tensorflow.org/lite) | Android, Linux +| [ONNX](https://onnxruntime.ai/docs/execution-providers/QNN-ExecutionProvider.html) | Android, Linux, Windows -All models have accuracy and end-to-end tests when applicable. These tests as -designed to be run locally and verify that the PyTorch code produces correct -results. To run the tests for a model: -```shell -python -m pytest --pyargs qai_hub_models.models.yolov7.test -``` ---- +### Device Hardware & Precision -For any issues, please contact us at ai-hub-support@qti.qualcomm.com. +| Device Compute Unit | Supported Precision | +| -- | -- | +| CPU | FP32, INT16, INT8 +| GPU | FP32, FP16 +| NPU (includes [Hexagon DSP](https://developer.qualcomm.com/software/hexagon-dsp-sdk/dsp-processor), [HTP](https://developer.qualcomm.com/hardware/qualcomm-innovators-development-kit/ai-resources-overview/ai-hardware-cores-accelerators)) | FP16*, INT16, INT8 +*Some older chipsets do not support fp16 inference on their NPU. ---- +### Chipsets +* Snapdragon [8 Elite](https://www.qualcomm.com/products/mobile/snapdragon/smartphones/snapdragon-8-series-mobile-platforms/snapdragon-8-elite-mobile-platform), [8 Gen 3](https://www.qualcomm.com/products/mobile/snapdragon/smartphones/snapdragon-8-series-mobile-platforms/snapdragon-8-gen-3-mobile-platform), [8 Gen 2](https://www.qualcomm.com/products/mobile/snapdragon/smartphones/snapdragon-8-series-mobile-platforms/snapdragon-8-gen-2-mobile-platform), and [8 Gen 1](https://www.qualcomm.com/products/mobile/snapdragon/smartphones/snapdragon-8-series-mobile-platforms/snapdragon-8-gen-1-mobile-platform) Mobile Platforms +* [Snapdragon X Elite](https://www.qualcomm.com/products/mobile/snapdragon/pcs-and-tablets/snapdragon-x-elite) Compute Platform +* SA8255P, SA8295P, SA8650P, and SA8775P Automotive Platforms +* [QCS 6490](https://www.qualcomm.com/products/internet-of-things/industrial/building-enterprise/qcs6490), [QCS 8250](https://www.qualcomm.com/products/internet-of-things/consumer/cameras/qcs8250), and [QCS 8550](https://www.qualcomm.com/products/technology/processors/qcs8550) IoT Platforms +* QCS8450 XR Platform -### LICENSE +and many more. -Qualcomm® AI Hub Models is licensed under BSD-3. See the [LICENSE file](../LICENSE). +### Devices +* Samsung Galaxy S21, S22, S23, and S24 Series +* Xiaomi 12 and 13 +* Snapdragon X Elite CRD (Compute Reference Device) +* Qualcomm RB3 Gen 2, RB5 +and many more. ---- +  ## Model Directory ### Computer Vision -| Model | README | Torch App | Device Export | CLI Demo -| -- | -- | -- | -- | -- -| | | | | +| Model | README | +| -- | -- | +| | | | **Image Classification** -| [ConvNext-Tiny](https://aihub.qualcomm.com/models/convnext_tiny) | [qai_hub_models.models.convnext_tiny](qai_hub_models/models/convnext_tiny/README.md) | ✔️ | ✔️ | ✔️ -| [ConvNext-Tiny-w8a16-Quantized](https://aihub.qualcomm.com/models/convnext_tiny_w8a16_quantized) | [qai_hub_models.models.convnext_tiny_w8a16_quantized](qai_hub_models/models/convnext_tiny_w8a16_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [ConvNext-Tiny-w8a8-Quantized](https://aihub.qualcomm.com/models/convnext_tiny_w8a8_quantized) | [qai_hub_models.models.convnext_tiny_w8a8_quantized](qai_hub_models/models/convnext_tiny_w8a8_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [DenseNet-121](https://aihub.qualcomm.com/models/densenet121) | [qai_hub_models.models.densenet121](qai_hub_models/models/densenet121/README.md) | ✔️ | ✔️ | ✔️ -| [DenseNet-121-Quantized](https://aihub.qualcomm.com/models/densenet121_quantized) | [qai_hub_models.models.densenet121_quantized](qai_hub_models/models/densenet121_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [EfficientNet-B0](https://aihub.qualcomm.com/models/efficientnet_b0) | [qai_hub_models.models.efficientnet_b0](qai_hub_models/models/efficientnet_b0/README.md) | ✔️ | ✔️ | ✔️ -| [GoogLeNet](https://aihub.qualcomm.com/models/googlenet) | [qai_hub_models.models.googlenet](qai_hub_models/models/googlenet/README.md) | ✔️ | ✔️ | ✔️ -| [GoogLeNetQuantized](https://aihub.qualcomm.com/models/googlenet_quantized) | [qai_hub_models.models.googlenet_quantized](qai_hub_models/models/googlenet_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [Inception-v3](https://aihub.qualcomm.com/models/inception_v3) | [qai_hub_models.models.inception_v3](qai_hub_models/models/inception_v3/README.md) | ✔️ | ✔️ | ✔️ -| [Inception-v3-Quantized](https://aihub.qualcomm.com/models/inception_v3_quantized) | [qai_hub_models.models.inception_v3_quantized](qai_hub_models/models/inception_v3_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [MNASNet05](https://aihub.qualcomm.com/models/mnasnet05) | [qai_hub_models.models.mnasnet05](qai_hub_models/models/mnasnet05/README.md) | ✔️ | ✔️ | ✔️ -| [MobileNet-v2](https://aihub.qualcomm.com/models/mobilenet_v2) | [qai_hub_models.models.mobilenet_v2](qai_hub_models/models/mobilenet_v2/README.md) | ✔️ | ✔️ | ✔️ -| [MobileNet-v2-Quantized](https://aihub.qualcomm.com/models/mobilenet_v2_quantized) | [qai_hub_models.models.mobilenet_v2_quantized](qai_hub_models/models/mobilenet_v2_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [MobileNet-v3-Large](https://aihub.qualcomm.com/models/mobilenet_v3_large) | [qai_hub_models.models.mobilenet_v3_large](qai_hub_models/models/mobilenet_v3_large/README.md) | ✔️ | ✔️ | ✔️ -| [MobileNet-v3-Large-Quantized](https://aihub.qualcomm.com/models/mobilenet_v3_large_quantized) | [qai_hub_models.models.mobilenet_v3_large_quantized](qai_hub_models/models/mobilenet_v3_large_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [MobileNet-v3-Small](https://aihub.qualcomm.com/models/mobilenet_v3_small) | [qai_hub_models.models.mobilenet_v3_small](qai_hub_models/models/mobilenet_v3_small/README.md) | ✔️ | ✔️ | ✔️ -| [RegNet](https://aihub.qualcomm.com/models/regnet) | [qai_hub_models.models.regnet](qai_hub_models/models/regnet/README.md) | ✔️ | ✔️ | ✔️ -| [RegNetQuantized](https://aihub.qualcomm.com/models/regnet_quantized) | [qai_hub_models.models.regnet_quantized](qai_hub_models/models/regnet_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [ResNeXt101](https://aihub.qualcomm.com/models/resnext101) | [qai_hub_models.models.resnext101](qai_hub_models/models/resnext101/README.md) | ✔️ | ✔️ | ✔️ -| [ResNeXt101Quantized](https://aihub.qualcomm.com/models/resnext101_quantized) | [qai_hub_models.models.resnext101_quantized](qai_hub_models/models/resnext101_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [ResNeXt50](https://aihub.qualcomm.com/models/resnext50) | [qai_hub_models.models.resnext50](qai_hub_models/models/resnext50/README.md) | ✔️ | ✔️ | ✔️ -| [ResNeXt50Quantized](https://aihub.qualcomm.com/models/resnext50_quantized) | [qai_hub_models.models.resnext50_quantized](qai_hub_models/models/resnext50_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [ResNet101](https://aihub.qualcomm.com/models/resnet101) | [qai_hub_models.models.resnet101](qai_hub_models/models/resnet101/README.md) | ✔️ | ✔️ | ✔️ -| [ResNet101Quantized](https://aihub.qualcomm.com/models/resnet101_quantized) | [qai_hub_models.models.resnet101_quantized](qai_hub_models/models/resnet101_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [ResNet18](https://aihub.qualcomm.com/models/resnet18) | [qai_hub_models.models.resnet18](qai_hub_models/models/resnet18/README.md) | ✔️ | ✔️ | ✔️ -| [ResNet18Quantized](https://aihub.qualcomm.com/models/resnet18_quantized) | [qai_hub_models.models.resnet18_quantized](qai_hub_models/models/resnet18_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [ResNet50](https://aihub.qualcomm.com/models/resnet50) | [qai_hub_models.models.resnet50](qai_hub_models/models/resnet50/README.md) | ✔️ | ✔️ | ✔️ -| [ResNet50Quantized](https://aihub.qualcomm.com/models/resnet50_quantized) | [qai_hub_models.models.resnet50_quantized](qai_hub_models/models/resnet50_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [Shufflenet-v2](https://aihub.qualcomm.com/models/shufflenet_v2) | [qai_hub_models.models.shufflenet_v2](qai_hub_models/models/shufflenet_v2/README.md) | ✔️ | ✔️ | ✔️ -| [Shufflenet-v2Quantized](https://aihub.qualcomm.com/models/shufflenet_v2_quantized) | [qai_hub_models.models.shufflenet_v2_quantized](qai_hub_models/models/shufflenet_v2_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [SqueezeNet-1_1](https://aihub.qualcomm.com/models/squeezenet1_1) | [qai_hub_models.models.squeezenet1_1](qai_hub_models/models/squeezenet1_1/README.md) | ✔️ | ✔️ | ✔️ -| [SqueezeNet-1_1Quantized](https://aihub.qualcomm.com/models/squeezenet1_1_quantized) | [qai_hub_models.models.squeezenet1_1_quantized](qai_hub_models/models/squeezenet1_1_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [Swin-Base](https://aihub.qualcomm.com/models/swin_base) | [qai_hub_models.models.swin_base](qai_hub_models/models/swin_base/README.md) | ✔️ | ✔️ | ✔️ -| [Swin-Small](https://aihub.qualcomm.com/models/swin_small) | [qai_hub_models.models.swin_small](qai_hub_models/models/swin_small/README.md) | ✔️ | ✔️ | ✔️ -| [Swin-Tiny](https://aihub.qualcomm.com/models/swin_tiny) | [qai_hub_models.models.swin_tiny](qai_hub_models/models/swin_tiny/README.md) | ✔️ | ✔️ | ✔️ -| [VIT](https://aihub.qualcomm.com/models/vit) | [qai_hub_models.models.vit](qai_hub_models/models/vit/README.md) | ✔️ | ✔️ | ✔️ -| [VITQuantized](https://aihub.qualcomm.com/models/vit_quantized) | [qai_hub_models.models.vit_quantized](qai_hub_models/models/vit_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [WideResNet50](https://aihub.qualcomm.com/models/wideresnet50) | [qai_hub_models.models.wideresnet50](qai_hub_models/models/wideresnet50/README.md) | ✔️ | ✔️ | ✔️ -| [WideResNet50-Quantized](https://aihub.qualcomm.com/models/wideresnet50_quantized) | [qai_hub_models.models.wideresnet50_quantized](qai_hub_models/models/wideresnet50_quantized/README.md) | ✔️ | ✔️ | ✔️ -| | | | | +| [ConvNext-Tiny](https://aihub.qualcomm.com/models/convnext_tiny) | [qai_hub_models.models.convnext_tiny](qai_hub_models/models/convnext_tiny/README.md) | +| [ConvNext-Tiny-w8a16-Quantized](https://aihub.qualcomm.com/models/convnext_tiny_w8a16_quantized) | [qai_hub_models.models.convnext_tiny_w8a16_quantized](qai_hub_models/models/convnext_tiny_w8a16_quantized/README.md) | +| [ConvNext-Tiny-w8a8-Quantized](https://aihub.qualcomm.com/models/convnext_tiny_w8a8_quantized) | [qai_hub_models.models.convnext_tiny_w8a8_quantized](qai_hub_models/models/convnext_tiny_w8a8_quantized/README.md) | +| [DenseNet-121](https://aihub.qualcomm.com/models/densenet121) | [qai_hub_models.models.densenet121](qai_hub_models/models/densenet121/README.md) | +| [DenseNet-121-Quantized](https://aihub.qualcomm.com/models/densenet121_quantized) | [qai_hub_models.models.densenet121_quantized](qai_hub_models/models/densenet121_quantized/README.md) | +| [EfficientNet-B0](https://aihub.qualcomm.com/models/efficientnet_b0) | [qai_hub_models.models.efficientnet_b0](qai_hub_models/models/efficientnet_b0/README.md) | +| [EfficientNet-B4](https://aihub.qualcomm.com/models/efficientnet_b4) | [qai_hub_models.models.efficientnet_b4](qai_hub_models/models/efficientnet_b4/README.md) | +| [EfficientViT-b2-cls](https://aihub.qualcomm.com/models/efficientvit_b2_cls) | [qai_hub_models.models.efficientvit_b2_cls](qai_hub_models/models/efficientvit_b2_cls/README.md) | +| [EfficientViT-l2-cls](https://aihub.qualcomm.com/models/efficientvit_l2_cls) | [qai_hub_models.models.efficientvit_l2_cls](qai_hub_models/models/efficientvit_l2_cls/README.md) | +| [GoogLeNet](https://aihub.qualcomm.com/models/googlenet) | [qai_hub_models.models.googlenet](qai_hub_models/models/googlenet/README.md) | +| [GoogLeNetQuantized](https://aihub.qualcomm.com/models/googlenet_quantized) | [qai_hub_models.models.googlenet_quantized](qai_hub_models/models/googlenet_quantized/README.md) | +| [Inception-v3](https://aihub.qualcomm.com/models/inception_v3) | [qai_hub_models.models.inception_v3](qai_hub_models/models/inception_v3/README.md) | +| [Inception-v3-Quantized](https://aihub.qualcomm.com/models/inception_v3_quantized) | [qai_hub_models.models.inception_v3_quantized](qai_hub_models/models/inception_v3_quantized/README.md) | +| [MNASNet05](https://aihub.qualcomm.com/models/mnasnet05) | [qai_hub_models.models.mnasnet05](qai_hub_models/models/mnasnet05/README.md) | +| [MobileNet-v2](https://aihub.qualcomm.com/models/mobilenet_v2) | [qai_hub_models.models.mobilenet_v2](qai_hub_models/models/mobilenet_v2/README.md) | +| [MobileNet-v2-Quantized](https://aihub.qualcomm.com/models/mobilenet_v2_quantized) | [qai_hub_models.models.mobilenet_v2_quantized](qai_hub_models/models/mobilenet_v2_quantized/README.md) | +| [MobileNet-v3-Large](https://aihub.qualcomm.com/models/mobilenet_v3_large) | [qai_hub_models.models.mobilenet_v3_large](qai_hub_models/models/mobilenet_v3_large/README.md) | +| [MobileNet-v3-Large-Quantized](https://aihub.qualcomm.com/models/mobilenet_v3_large_quantized) | [qai_hub_models.models.mobilenet_v3_large_quantized](qai_hub_models/models/mobilenet_v3_large_quantized/README.md) | +| [MobileNet-v3-Small](https://aihub.qualcomm.com/models/mobilenet_v3_small) | [qai_hub_models.models.mobilenet_v3_small](qai_hub_models/models/mobilenet_v3_small/README.md) | +| [RegNet](https://aihub.qualcomm.com/models/regnet) | [qai_hub_models.models.regnet](qai_hub_models/models/regnet/README.md) | +| [RegNetQuantized](https://aihub.qualcomm.com/models/regnet_quantized) | [qai_hub_models.models.regnet_quantized](qai_hub_models/models/regnet_quantized/README.md) | +| [ResNeXt101](https://aihub.qualcomm.com/models/resnext101) | [qai_hub_models.models.resnext101](qai_hub_models/models/resnext101/README.md) | +| [ResNeXt101Quantized](https://aihub.qualcomm.com/models/resnext101_quantized) | [qai_hub_models.models.resnext101_quantized](qai_hub_models/models/resnext101_quantized/README.md) | +| [ResNeXt50](https://aihub.qualcomm.com/models/resnext50) | [qai_hub_models.models.resnext50](qai_hub_models/models/resnext50/README.md) | +| [ResNeXt50Quantized](https://aihub.qualcomm.com/models/resnext50_quantized) | [qai_hub_models.models.resnext50_quantized](qai_hub_models/models/resnext50_quantized/README.md) | +| [ResNet101](https://aihub.qualcomm.com/models/resnet101) | [qai_hub_models.models.resnet101](qai_hub_models/models/resnet101/README.md) | +| [ResNet101Quantized](https://aihub.qualcomm.com/models/resnet101_quantized) | [qai_hub_models.models.resnet101_quantized](qai_hub_models/models/resnet101_quantized/README.md) | +| [ResNet18](https://aihub.qualcomm.com/models/resnet18) | [qai_hub_models.models.resnet18](qai_hub_models/models/resnet18/README.md) | +| [ResNet18Quantized](https://aihub.qualcomm.com/models/resnet18_quantized) | [qai_hub_models.models.resnet18_quantized](qai_hub_models/models/resnet18_quantized/README.md) | +| [ResNet50](https://aihub.qualcomm.com/models/resnet50) | [qai_hub_models.models.resnet50](qai_hub_models/models/resnet50/README.md) | +| [ResNet50Quantized](https://aihub.qualcomm.com/models/resnet50_quantized) | [qai_hub_models.models.resnet50_quantized](qai_hub_models/models/resnet50_quantized/README.md) | +| [Shufflenet-v2](https://aihub.qualcomm.com/models/shufflenet_v2) | [qai_hub_models.models.shufflenet_v2](qai_hub_models/models/shufflenet_v2/README.md) | +| [Shufflenet-v2Quantized](https://aihub.qualcomm.com/models/shufflenet_v2_quantized) | [qai_hub_models.models.shufflenet_v2_quantized](qai_hub_models/models/shufflenet_v2_quantized/README.md) | +| [SqueezeNet-1_1](https://aihub.qualcomm.com/models/squeezenet1_1) | [qai_hub_models.models.squeezenet1_1](qai_hub_models/models/squeezenet1_1/README.md) | +| [SqueezeNet-1_1Quantized](https://aihub.qualcomm.com/models/squeezenet1_1_quantized) | [qai_hub_models.models.squeezenet1_1_quantized](qai_hub_models/models/squeezenet1_1_quantized/README.md) | +| [Swin-Base](https://aihub.qualcomm.com/models/swin_base) | [qai_hub_models.models.swin_base](qai_hub_models/models/swin_base/README.md) | +| [Swin-Small](https://aihub.qualcomm.com/models/swin_small) | [qai_hub_models.models.swin_small](qai_hub_models/models/swin_small/README.md) | +| [Swin-Tiny](https://aihub.qualcomm.com/models/swin_tiny) | [qai_hub_models.models.swin_tiny](qai_hub_models/models/swin_tiny/README.md) | +| [VIT](https://aihub.qualcomm.com/models/vit) | [qai_hub_models.models.vit](qai_hub_models/models/vit/README.md) | +| [VITQuantized](https://aihub.qualcomm.com/models/vit_quantized) | [qai_hub_models.models.vit_quantized](qai_hub_models/models/vit_quantized/README.md) | +| [WideResNet50](https://aihub.qualcomm.com/models/wideresnet50) | [qai_hub_models.models.wideresnet50](qai_hub_models/models/wideresnet50/README.md) | +| [WideResNet50-Quantized](https://aihub.qualcomm.com/models/wideresnet50_quantized) | [qai_hub_models.models.wideresnet50_quantized](qai_hub_models/models/wideresnet50_quantized/README.md) | +| | | | **Image Editing** -| [AOT-GAN](https://aihub.qualcomm.com/models/aotgan) | [qai_hub_models.models.aotgan](qai_hub_models/models/aotgan/README.md) | ✔️ | ✔️ | ✔️ -| [LaMa-Dilated](https://aihub.qualcomm.com/models/lama_dilated) | [qai_hub_models.models.lama_dilated](qai_hub_models/models/lama_dilated/README.md) | ✔️ | ✔️ | ✔️ -| | | | | +| [AOT-GAN](https://aihub.qualcomm.com/models/aotgan) | [qai_hub_models.models.aotgan](qai_hub_models/models/aotgan/README.md) | +| [LaMa-Dilated](https://aihub.qualcomm.com/models/lama_dilated) | [qai_hub_models.models.lama_dilated](qai_hub_models/models/lama_dilated/README.md) | +| | | | **Super Resolution** -| [ESRGAN](https://aihub.qualcomm.com/models/esrgan) | [qai_hub_models.models.esrgan](qai_hub_models/models/esrgan/README.md) | ✔️ | ✔️ | ✔️ -| [QuickSRNetLarge](https://aihub.qualcomm.com/models/quicksrnetlarge) | [qai_hub_models.models.quicksrnetlarge](qai_hub_models/models/quicksrnetlarge/README.md) | ✔️ | ✔️ | ✔️ -| [QuickSRNetLarge-Quantized](https://aihub.qualcomm.com/models/quicksrnetlarge_quantized) | [qai_hub_models.models.quicksrnetlarge_quantized](qai_hub_models/models/quicksrnetlarge_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [QuickSRNetMedium](https://aihub.qualcomm.com/models/quicksrnetmedium) | [qai_hub_models.models.quicksrnetmedium](qai_hub_models/models/quicksrnetmedium/README.md) | ✔️ | ✔️ | ✔️ -| [QuickSRNetMedium-Quantized](https://aihub.qualcomm.com/models/quicksrnetmedium_quantized) | [qai_hub_models.models.quicksrnetmedium_quantized](qai_hub_models/models/quicksrnetmedium_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [QuickSRNetSmall](https://aihub.qualcomm.com/models/quicksrnetsmall) | [qai_hub_models.models.quicksrnetsmall](qai_hub_models/models/quicksrnetsmall/README.md) | ✔️ | ✔️ | ✔️ -| [QuickSRNetSmall-Quantized](https://aihub.qualcomm.com/models/quicksrnetsmall_quantized) | [qai_hub_models.models.quicksrnetsmall_quantized](qai_hub_models/models/quicksrnetsmall_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [Real-ESRGAN-General-x4v3](https://aihub.qualcomm.com/models/real_esrgan_general_x4v3) | [qai_hub_models.models.real_esrgan_general_x4v3](qai_hub_models/models/real_esrgan_general_x4v3/README.md) | ✔️ | ✔️ | ✔️ -| [Real-ESRGAN-x4plus](https://aihub.qualcomm.com/models/real_esrgan_x4plus) | [qai_hub_models.models.real_esrgan_x4plus](qai_hub_models/models/real_esrgan_x4plus/README.md) | ✔️ | ✔️ | ✔️ -| [SESR-M5](https://aihub.qualcomm.com/models/sesr_m5) | [qai_hub_models.models.sesr_m5](qai_hub_models/models/sesr_m5/README.md) | ✔️ | ✔️ | ✔️ -| [SESR-M5-Quantized](https://aihub.qualcomm.com/models/sesr_m5_quantized) | [qai_hub_models.models.sesr_m5_quantized](qai_hub_models/models/sesr_m5_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [XLSR](https://aihub.qualcomm.com/models/xlsr) | [qai_hub_models.models.xlsr](qai_hub_models/models/xlsr/README.md) | ✔️ | ✔️ | ✔️ -| [XLSR-Quantized](https://aihub.qualcomm.com/models/xlsr_quantized) | [qai_hub_models.models.xlsr_quantized](qai_hub_models/models/xlsr_quantized/README.md) | ✔️ | ✔️ | ✔️ -| | | | | +| [ESRGAN](https://aihub.qualcomm.com/models/esrgan) | [qai_hub_models.models.esrgan](qai_hub_models/models/esrgan/README.md) | +| [QuickSRNetLarge](https://aihub.qualcomm.com/models/quicksrnetlarge) | [qai_hub_models.models.quicksrnetlarge](qai_hub_models/models/quicksrnetlarge/README.md) | +| [QuickSRNetLarge-Quantized](https://aihub.qualcomm.com/models/quicksrnetlarge_quantized) | [qai_hub_models.models.quicksrnetlarge_quantized](qai_hub_models/models/quicksrnetlarge_quantized/README.md) | +| [QuickSRNetMedium](https://aihub.qualcomm.com/models/quicksrnetmedium) | [qai_hub_models.models.quicksrnetmedium](qai_hub_models/models/quicksrnetmedium/README.md) | +| [QuickSRNetMedium-Quantized](https://aihub.qualcomm.com/models/quicksrnetmedium_quantized) | [qai_hub_models.models.quicksrnetmedium_quantized](qai_hub_models/models/quicksrnetmedium_quantized/README.md) | +| [QuickSRNetSmall](https://aihub.qualcomm.com/models/quicksrnetsmall) | [qai_hub_models.models.quicksrnetsmall](qai_hub_models/models/quicksrnetsmall/README.md) | +| [QuickSRNetSmall-Quantized](https://aihub.qualcomm.com/models/quicksrnetsmall_quantized) | [qai_hub_models.models.quicksrnetsmall_quantized](qai_hub_models/models/quicksrnetsmall_quantized/README.md) | +| [Real-ESRGAN-General-x4v3](https://aihub.qualcomm.com/models/real_esrgan_general_x4v3) | [qai_hub_models.models.real_esrgan_general_x4v3](qai_hub_models/models/real_esrgan_general_x4v3/README.md) | +| [Real-ESRGAN-x4plus](https://aihub.qualcomm.com/models/real_esrgan_x4plus) | [qai_hub_models.models.real_esrgan_x4plus](qai_hub_models/models/real_esrgan_x4plus/README.md) | +| [SESR-M5](https://aihub.qualcomm.com/models/sesr_m5) | [qai_hub_models.models.sesr_m5](qai_hub_models/models/sesr_m5/README.md) | +| [SESR-M5-Quantized](https://aihub.qualcomm.com/models/sesr_m5_quantized) | [qai_hub_models.models.sesr_m5_quantized](qai_hub_models/models/sesr_m5_quantized/README.md) | +| [XLSR](https://aihub.qualcomm.com/models/xlsr) | [qai_hub_models.models.xlsr](qai_hub_models/models/xlsr/README.md) | +| [XLSR-Quantized](https://aihub.qualcomm.com/models/xlsr_quantized) | [qai_hub_models.models.xlsr_quantized](qai_hub_models/models/xlsr_quantized/README.md) | +| | | | **Semantic Segmentation** -| [DDRNet23-Slim](https://aihub.qualcomm.com/models/ddrnet23_slim) | [qai_hub_models.models.ddrnet23_slim](qai_hub_models/models/ddrnet23_slim/README.md) | ✔️ | ✔️ | ✔️ -| [DeepLabV3-Plus-MobileNet](https://aihub.qualcomm.com/models/deeplabv3_plus_mobilenet) | [qai_hub_models.models.deeplabv3_plus_mobilenet](qai_hub_models/models/deeplabv3_plus_mobilenet/README.md) | ✔️ | ✔️ | ✔️ -| [DeepLabV3-Plus-MobileNet-Quantized](https://aihub.qualcomm.com/models/deeplabv3_plus_mobilenet_quantized) | [qai_hub_models.models.deeplabv3_plus_mobilenet_quantized](qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [DeepLabV3-ResNet50](https://aihub.qualcomm.com/models/deeplabv3_resnet50) | [qai_hub_models.models.deeplabv3_resnet50](qai_hub_models/models/deeplabv3_resnet50/README.md) | ✔️ | ✔️ | ✔️ -| [FCN-ResNet50](https://aihub.qualcomm.com/models/fcn_resnet50) | [qai_hub_models.models.fcn_resnet50](qai_hub_models/models/fcn_resnet50/README.md) | ✔️ | ✔️ | ✔️ -| [FCN-ResNet50-Quantized](https://aihub.qualcomm.com/models/fcn_resnet50_quantized) | [qai_hub_models.models.fcn_resnet50_quantized](qai_hub_models/models/fcn_resnet50_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [FFNet-122NS-LowRes](https://aihub.qualcomm.com/models/ffnet_122ns_lowres) | [qai_hub_models.models.ffnet_122ns_lowres](qai_hub_models/models/ffnet_122ns_lowres/README.md) | ✔️ | ✔️ | ✔️ -| [FFNet-40S](https://aihub.qualcomm.com/models/ffnet_40s) | [qai_hub_models.models.ffnet_40s](qai_hub_models/models/ffnet_40s/README.md) | ✔️ | ✔️ | ✔️ -| [FFNet-40S-Quantized](https://aihub.qualcomm.com/models/ffnet_40s_quantized) | [qai_hub_models.models.ffnet_40s_quantized](qai_hub_models/models/ffnet_40s_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [FFNet-54S](https://aihub.qualcomm.com/models/ffnet_54s) | [qai_hub_models.models.ffnet_54s](qai_hub_models/models/ffnet_54s/README.md) | ✔️ | ✔️ | ✔️ -| [FFNet-54S-Quantized](https://aihub.qualcomm.com/models/ffnet_54s_quantized) | [qai_hub_models.models.ffnet_54s_quantized](qai_hub_models/models/ffnet_54s_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [FFNet-78S](https://aihub.qualcomm.com/models/ffnet_78s) | [qai_hub_models.models.ffnet_78s](qai_hub_models/models/ffnet_78s/README.md) | ✔️ | ✔️ | ✔️ -| [FFNet-78S-LowRes](https://aihub.qualcomm.com/models/ffnet_78s_lowres) | [qai_hub_models.models.ffnet_78s_lowres](qai_hub_models/models/ffnet_78s_lowres/README.md) | ✔️ | ✔️ | ✔️ -| [FFNet-78S-Quantized](https://aihub.qualcomm.com/models/ffnet_78s_quantized) | [qai_hub_models.models.ffnet_78s_quantized](qai_hub_models/models/ffnet_78s_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [FastSam-S](https://aihub.qualcomm.com/models/fastsam_s) | [qai_hub_models.models.fastsam_s](qai_hub_models/models/fastsam_s/README.md) | ✔️ | ✔️ | ✔️ -| [FastSam-X](https://aihub.qualcomm.com/models/fastsam_x) | [qai_hub_models.models.fastsam_x](qai_hub_models/models/fastsam_x/README.md) | ✔️ | ✔️ | ✔️ -| [MediaPipe-Selfie-Segmentation](https://aihub.qualcomm.com/models/mediapipe_selfie) | [qai_hub_models.models.mediapipe_selfie](qai_hub_models/models/mediapipe_selfie/README.md) | ✔️ | ✔️ | ✔️ -| [SINet](https://aihub.qualcomm.com/models/sinet) | [qai_hub_models.models.sinet](qai_hub_models/models/sinet/README.md) | ✔️ | ✔️ | ✔️ -| [Segment-Anything-Model](https://aihub.qualcomm.com/models/sam) | [qai_hub_models.models.sam](qai_hub_models/models/sam/README.md) | ✔️ | ✔️ | ✔️ -| [Unet-Segmentation](https://aihub.qualcomm.com/models/unet_segmentation) | [qai_hub_models.models.unet_segmentation](qai_hub_models/models/unet_segmentation/README.md) | ✔️ | ✔️ | ✔️ -| [YOLOv8-Segmentation](https://aihub.qualcomm.com/models/yolov8_seg) | [qai_hub_models.models.yolov8_seg](qai_hub_models/models/yolov8_seg/README.md) | ✔️ | ✔️ | ✔️ -| | | | | +| [DDRNet23-Slim](https://aihub.qualcomm.com/models/ddrnet23_slim) | [qai_hub_models.models.ddrnet23_slim](qai_hub_models/models/ddrnet23_slim/README.md) | +| [DeepLabV3-Plus-MobileNet](https://aihub.qualcomm.com/models/deeplabv3_plus_mobilenet) | [qai_hub_models.models.deeplabv3_plus_mobilenet](qai_hub_models/models/deeplabv3_plus_mobilenet/README.md) | +| [DeepLabV3-Plus-MobileNet-Quantized](https://aihub.qualcomm.com/models/deeplabv3_plus_mobilenet_quantized) | [qai_hub_models.models.deeplabv3_plus_mobilenet_quantized](qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/README.md) | +| [DeepLabV3-ResNet50](https://aihub.qualcomm.com/models/deeplabv3_resnet50) | [qai_hub_models.models.deeplabv3_resnet50](qai_hub_models/models/deeplabv3_resnet50/README.md) | +| [FCN-ResNet50](https://aihub.qualcomm.com/models/fcn_resnet50) | [qai_hub_models.models.fcn_resnet50](qai_hub_models/models/fcn_resnet50/README.md) | +| [FCN-ResNet50-Quantized](https://aihub.qualcomm.com/models/fcn_resnet50_quantized) | [qai_hub_models.models.fcn_resnet50_quantized](qai_hub_models/models/fcn_resnet50_quantized/README.md) | +| [FFNet-122NS-LowRes](https://aihub.qualcomm.com/models/ffnet_122ns_lowres) | [qai_hub_models.models.ffnet_122ns_lowres](qai_hub_models/models/ffnet_122ns_lowres/README.md) | +| [FFNet-40S](https://aihub.qualcomm.com/models/ffnet_40s) | [qai_hub_models.models.ffnet_40s](qai_hub_models/models/ffnet_40s/README.md) | +| [FFNet-40S-Quantized](https://aihub.qualcomm.com/models/ffnet_40s_quantized) | [qai_hub_models.models.ffnet_40s_quantized](qai_hub_models/models/ffnet_40s_quantized/README.md) | +| [FFNet-54S](https://aihub.qualcomm.com/models/ffnet_54s) | [qai_hub_models.models.ffnet_54s](qai_hub_models/models/ffnet_54s/README.md) | +| [FFNet-54S-Quantized](https://aihub.qualcomm.com/models/ffnet_54s_quantized) | [qai_hub_models.models.ffnet_54s_quantized](qai_hub_models/models/ffnet_54s_quantized/README.md) | +| [FFNet-78S](https://aihub.qualcomm.com/models/ffnet_78s) | [qai_hub_models.models.ffnet_78s](qai_hub_models/models/ffnet_78s/README.md) | +| [FFNet-78S-LowRes](https://aihub.qualcomm.com/models/ffnet_78s_lowres) | [qai_hub_models.models.ffnet_78s_lowres](qai_hub_models/models/ffnet_78s_lowres/README.md) | +| [FFNet-78S-Quantized](https://aihub.qualcomm.com/models/ffnet_78s_quantized) | [qai_hub_models.models.ffnet_78s_quantized](qai_hub_models/models/ffnet_78s_quantized/README.md) | +| [FastSam-S](https://aihub.qualcomm.com/models/fastsam_s) | [qai_hub_models.models.fastsam_s](qai_hub_models/models/fastsam_s/README.md) | +| [FastSam-X](https://aihub.qualcomm.com/models/fastsam_x) | [qai_hub_models.models.fastsam_x](qai_hub_models/models/fastsam_x/README.md) | +| [MediaPipe-Selfie-Segmentation](https://aihub.qualcomm.com/models/mediapipe_selfie) | [qai_hub_models.models.mediapipe_selfie](qai_hub_models/models/mediapipe_selfie/README.md) | +| [SINet](https://aihub.qualcomm.com/models/sinet) | [qai_hub_models.models.sinet](qai_hub_models/models/sinet/README.md) | +| [Segment-Anything-Model](https://aihub.qualcomm.com/models/sam) | [qai_hub_models.models.sam](qai_hub_models/models/sam/README.md) | +| [Unet-Segmentation](https://aihub.qualcomm.com/models/unet_segmentation) | [qai_hub_models.models.unet_segmentation](qai_hub_models/models/unet_segmentation/README.md) | +| [YOLOv8-Segmentation](https://aihub.qualcomm.com/models/yolov8_seg) | [qai_hub_models.models.yolov8_seg](qai_hub_models/models/yolov8_seg/README.md) | +| | | | **Object Detection** -| [DETR-ResNet101](https://aihub.qualcomm.com/models/detr_resnet101) | [qai_hub_models.models.detr_resnet101](qai_hub_models/models/detr_resnet101/README.md) | ✔️ | ✔️ | ✔️ -| [DETR-ResNet101-DC5](https://aihub.qualcomm.com/models/detr_resnet101_dc5) | [qai_hub_models.models.detr_resnet101_dc5](qai_hub_models/models/detr_resnet101_dc5/README.md) | ✔️ | ✔️ | ✔️ -| [DETR-ResNet50](https://aihub.qualcomm.com/models/detr_resnet50) | [qai_hub_models.models.detr_resnet50](qai_hub_models/models/detr_resnet50/README.md) | ✔️ | ✔️ | ✔️ -| [DETR-ResNet50-DC5](https://aihub.qualcomm.com/models/detr_resnet50_dc5) | [qai_hub_models.models.detr_resnet50_dc5](qai_hub_models/models/detr_resnet50_dc5/README.md) | ✔️ | ✔️ | ✔️ -| [FaceAttribNet](https://aihub.qualcomm.com/models/face_attrib_net) | [qai_hub_models.models.face_attrib_net](qai_hub_models/models/face_attrib_net/README.md) | ✔️ | ✔️ | ✔️ -| [FootTrackNet_Quantized](https://aihub.qualcomm.com/models/foot_track_net_quantized) | [qai_hub_models.models.foot_track_net_quantized](qai_hub_models/models/foot_track_net_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [Lightweight-Face-Detection](https://aihub.qualcomm.com/models/face_det_lite) | [qai_hub_models.models.face_det_lite](qai_hub_models/models/face_det_lite/README.md) | ✔️ | ✔️ | ✔️ -| [MediaPipe-Face-Detection](https://aihub.qualcomm.com/models/mediapipe_face) | [qai_hub_models.models.mediapipe_face](qai_hub_models/models/mediapipe_face/README.md) | ✔️ | ✔️ | ✔️ -| [MediaPipe-Face-Detection-Quantized](https://aihub.qualcomm.com/models/mediapipe_face_quantized) | [qai_hub_models.models.mediapipe_face_quantized](qai_hub_models/models/mediapipe_face_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [MediaPipe-Hand-Detection](https://aihub.qualcomm.com/models/mediapipe_hand) | [qai_hub_models.models.mediapipe_hand](qai_hub_models/models/mediapipe_hand/README.md) | ✔️ | ✔️ | ✔️ -| [PPE-Detection](https://aihub.qualcomm.com/models/gear_guard_net) | [qai_hub_models.models.gear_guard_net](qai_hub_models/models/gear_guard_net/README.md) | ✔️ | ✔️ | ✔️ -| [PPE-Detection-Quantized](https://aihub.qualcomm.com/models/gear_guard_net_quantized) | [qai_hub_models.models.gear_guard_net_quantized](qai_hub_models/models/gear_guard_net_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [Person-Foot-Detection](https://aihub.qualcomm.com/models/foot_track_net) | [qai_hub_models.models.foot_track_net](qai_hub_models/models/foot_track_net/README.md) | ✔️ | ✔️ | ✔️ -| [YOLOv11-Detection](https://aihub.qualcomm.com/models/yolov11_det) | [qai_hub_models.models.yolov11_det](qai_hub_models/models/yolov11_det/README.md) | ✔️ | ✔️ | ✔️ -| [YOLOv8-Detection](https://aihub.qualcomm.com/models/yolov8_det) | [qai_hub_models.models.yolov8_det](qai_hub_models/models/yolov8_det/README.md) | ✔️ | ✔️ | ✔️ -| [YOLOv8-Detection-Quantized](https://aihub.qualcomm.com/models/yolov8_det_quantized) | [qai_hub_models.models.yolov8_det_quantized](qai_hub_models/models/yolov8_det_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [Yolo-NAS](https://aihub.qualcomm.com/models/yolonas) | [qai_hub_models.models.yolonas](qai_hub_models/models/yolonas/README.md) | ✔️ | ✔️ | ✔️ -| [Yolo-NAS-Quantized](https://aihub.qualcomm.com/models/yolonas_quantized) | [qai_hub_models.models.yolonas_quantized](qai_hub_models/models/yolonas_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [Yolo-v6](https://aihub.qualcomm.com/models/yolov6) | [qai_hub_models.models.yolov6](qai_hub_models/models/yolov6/README.md) | ✔️ | ✔️ | ✔️ -| [Yolo-v7](https://aihub.qualcomm.com/models/yolov7) | [qai_hub_models.models.yolov7](qai_hub_models/models/yolov7/README.md) | ✔️ | ✔️ | ✔️ -| [Yolo-v7-Quantized](https://aihub.qualcomm.com/models/yolov7_quantized) | [qai_hub_models.models.yolov7_quantized](qai_hub_models/models/yolov7_quantized/README.md) | ✔️ | ✔️ | ✔️ -| | | | | +| [DETR-ResNet101](https://aihub.qualcomm.com/models/detr_resnet101) | [qai_hub_models.models.detr_resnet101](qai_hub_models/models/detr_resnet101/README.md) | +| [DETR-ResNet101-DC5](https://aihub.qualcomm.com/models/detr_resnet101_dc5) | [qai_hub_models.models.detr_resnet101_dc5](qai_hub_models/models/detr_resnet101_dc5/README.md) | +| [DETR-ResNet50](https://aihub.qualcomm.com/models/detr_resnet50) | [qai_hub_models.models.detr_resnet50](qai_hub_models/models/detr_resnet50/README.md) | +| [DETR-ResNet50-DC5](https://aihub.qualcomm.com/models/detr_resnet50_dc5) | [qai_hub_models.models.detr_resnet50_dc5](qai_hub_models/models/detr_resnet50_dc5/README.md) | +| [FaceAttribNet](https://aihub.qualcomm.com/models/face_attrib_net) | [qai_hub_models.models.face_attrib_net](qai_hub_models/models/face_attrib_net/README.md) | +| [Lightweight-Face-Detection](https://aihub.qualcomm.com/models/face_det_lite) | [qai_hub_models.models.face_det_lite](qai_hub_models/models/face_det_lite/README.md) | +| [MediaPipe-Face-Detection](https://aihub.qualcomm.com/models/mediapipe_face) | [qai_hub_models.models.mediapipe_face](qai_hub_models/models/mediapipe_face/README.md) | +| [MediaPipe-Face-Detection-Quantized](https://aihub.qualcomm.com/models/mediapipe_face_quantized) | [qai_hub_models.models.mediapipe_face_quantized](qai_hub_models/models/mediapipe_face_quantized/README.md) | +| [MediaPipe-Hand-Detection](https://aihub.qualcomm.com/models/mediapipe_hand) | [qai_hub_models.models.mediapipe_hand](qai_hub_models/models/mediapipe_hand/README.md) | +| [PPE-Detection](https://aihub.qualcomm.com/models/gear_guard_net) | [qai_hub_models.models.gear_guard_net](qai_hub_models/models/gear_guard_net/README.md) | +| [PPE-Detection-Quantized](https://aihub.qualcomm.com/models/gear_guard_net_quantized) | [qai_hub_models.models.gear_guard_net_quantized](qai_hub_models/models/gear_guard_net_quantized/README.md) | +| [Person-Foot-Detection](https://aihub.qualcomm.com/models/foot_track_net) | [qai_hub_models.models.foot_track_net](qai_hub_models/models/foot_track_net/README.md) | +| [Person-Foot-Detection-Quantized](https://aihub.qualcomm.com/models/foot_track_net_quantized) | [qai_hub_models.models.foot_track_net_quantized](qai_hub_models/models/foot_track_net_quantized/README.md) | +| [YOLOv11-Detection](https://aihub.qualcomm.com/models/yolov11_det) | [qai_hub_models.models.yolov11_det](qai_hub_models/models/yolov11_det/README.md) | +| [YOLOv8-Detection](https://aihub.qualcomm.com/models/yolov8_det) | [qai_hub_models.models.yolov8_det](qai_hub_models/models/yolov8_det/README.md) | +| [YOLOv8-Detection-Quantized](https://aihub.qualcomm.com/models/yolov8_det_quantized) | [qai_hub_models.models.yolov8_det_quantized](qai_hub_models/models/yolov8_det_quantized/README.md) | +| [Yolo-NAS](https://aihub.qualcomm.com/models/yolonas) | [qai_hub_models.models.yolonas](qai_hub_models/models/yolonas/README.md) | +| [Yolo-NAS-Quantized](https://aihub.qualcomm.com/models/yolonas_quantized) | [qai_hub_models.models.yolonas_quantized](qai_hub_models/models/yolonas_quantized/README.md) | +| [Yolo-v6](https://aihub.qualcomm.com/models/yolov6) | [qai_hub_models.models.yolov6](qai_hub_models/models/yolov6/README.md) | +| [Yolo-v7](https://aihub.qualcomm.com/models/yolov7) | [qai_hub_models.models.yolov7](qai_hub_models/models/yolov7/README.md) | +| [Yolo-v7-Quantized](https://aihub.qualcomm.com/models/yolov7_quantized) | [qai_hub_models.models.yolov7_quantized](qai_hub_models/models/yolov7_quantized/README.md) | +| | | | **Pose Estimation** -| [Facial-Landmark-Detection](https://aihub.qualcomm.com/models/facemap_3dmm) | [qai_hub_models.models.facemap_3dmm](qai_hub_models/models/facemap_3dmm/README.md) | ✔️ | ✔️ | ✔️ -| [HRNetPose](https://aihub.qualcomm.com/models/hrnet_pose) | [qai_hub_models.models.hrnet_pose](qai_hub_models/models/hrnet_pose/README.md) | ✔️ | ✔️ | ✔️ -| [HRNetPoseQuantized](https://aihub.qualcomm.com/models/hrnet_pose_quantized) | [qai_hub_models.models.hrnet_pose_quantized](qai_hub_models/models/hrnet_pose_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [LiteHRNet](https://aihub.qualcomm.com/models/litehrnet) | [qai_hub_models.models.litehrnet](qai_hub_models/models/litehrnet/README.md) | ✔️ | ✔️ | ✔️ -| [MediaPipe-Pose-Estimation](https://aihub.qualcomm.com/models/mediapipe_pose) | [qai_hub_models.models.mediapipe_pose](qai_hub_models/models/mediapipe_pose/README.md) | ✔️ | ✔️ | ✔️ -| [OpenPose](https://aihub.qualcomm.com/models/openpose) | [qai_hub_models.models.openpose](qai_hub_models/models/openpose/README.md) | ✔️ | ✔️ | ✔️ -| [Posenet-Mobilenet](https://aihub.qualcomm.com/models/posenet_mobilenet) | [qai_hub_models.models.posenet_mobilenet](qai_hub_models/models/posenet_mobilenet/README.md) | ✔️ | ✔️ | ✔️ -| [Posenet-Mobilenet-Quantized](https://aihub.qualcomm.com/models/posenet_mobilenet_quantized) | [qai_hub_models.models.posenet_mobilenet_quantized](qai_hub_models/models/posenet_mobilenet_quantized/README.md) | ✔️ | ✔️ | ✔️ -| | | | | +| [Facial-Landmark-Detection](https://aihub.qualcomm.com/models/facemap_3dmm) | [qai_hub_models.models.facemap_3dmm](qai_hub_models/models/facemap_3dmm/README.md) | +| [HRNetPose](https://aihub.qualcomm.com/models/hrnet_pose) | [qai_hub_models.models.hrnet_pose](qai_hub_models/models/hrnet_pose/README.md) | +| [HRNetPoseQuantized](https://aihub.qualcomm.com/models/hrnet_pose_quantized) | [qai_hub_models.models.hrnet_pose_quantized](qai_hub_models/models/hrnet_pose_quantized/README.md) | +| [LiteHRNet](https://aihub.qualcomm.com/models/litehrnet) | [qai_hub_models.models.litehrnet](qai_hub_models/models/litehrnet/README.md) | +| [MediaPipe-Pose-Estimation](https://aihub.qualcomm.com/models/mediapipe_pose) | [qai_hub_models.models.mediapipe_pose](qai_hub_models/models/mediapipe_pose/README.md) | +| [OpenPose](https://aihub.qualcomm.com/models/openpose) | [qai_hub_models.models.openpose](qai_hub_models/models/openpose/README.md) | +| [Posenet-Mobilenet](https://aihub.qualcomm.com/models/posenet_mobilenet) | [qai_hub_models.models.posenet_mobilenet](qai_hub_models/models/posenet_mobilenet/README.md) | +| [Posenet-Mobilenet-Quantized](https://aihub.qualcomm.com/models/posenet_mobilenet_quantized) | [qai_hub_models.models.posenet_mobilenet_quantized](qai_hub_models/models/posenet_mobilenet_quantized/README.md) | +| | | | **Depth Estimation** -| [Midas-V2](https://aihub.qualcomm.com/models/midas) | [qai_hub_models.models.midas](qai_hub_models/models/midas/README.md) | ✔️ | ✔️ | ✔️ -| [Midas-V2-Quantized](https://aihub.qualcomm.com/models/midas_quantized) | [qai_hub_models.models.midas_quantized](qai_hub_models/models/midas_quantized/README.md) | ✔️ | ✔️ | ✔️ +| [Midas-V2](https://aihub.qualcomm.com/models/midas) | [qai_hub_models.models.midas](qai_hub_models/models/midas/README.md) | +| [Midas-V2-Quantized](https://aihub.qualcomm.com/models/midas_quantized) | [qai_hub_models.models.midas_quantized](qai_hub_models/models/midas_quantized/README.md) | ### Audio -| Model | README | Torch App | Device Export | CLI Demo -| -- | -- | -- | -- | -- -| | | | | +| Model | README | +| -- | -- | +| | | | **Speech Recognition** -| [HuggingFace-WavLM-Base-Plus](https://aihub.qualcomm.com/models/huggingface_wavlm_base_plus) | [qai_hub_models.models.huggingface_wavlm_base_plus](qai_hub_models/models/huggingface_wavlm_base_plus/README.md) | ✔️ | ✔️ | ✔️ -| [Whisper-Base-En](https://aihub.qualcomm.com/models/whisper_base_en) | [qai_hub_models.models.whisper_base_en](qai_hub_models/models/whisper_base_en/README.md) | ✔️ | ✔️ | ✔️ -| [Whisper-Small-En](https://aihub.qualcomm.com/models/whisper_small_en) | [qai_hub_models.models.whisper_small_en](qai_hub_models/models/whisper_small_en/README.md) | ✔️ | ✔️ | ✔️ -| [Whisper-Tiny-En](https://aihub.qualcomm.com/models/whisper_tiny_en) | [qai_hub_models.models.whisper_tiny_en](qai_hub_models/models/whisper_tiny_en/README.md) | ✔️ | ✔️ | ✔️ +| [HuggingFace-WavLM-Base-Plus](https://aihub.qualcomm.com/models/huggingface_wavlm_base_plus) | [qai_hub_models.models.huggingface_wavlm_base_plus](qai_hub_models/models/huggingface_wavlm_base_plus/README.md) | +| [Whisper-Base-En](https://aihub.qualcomm.com/models/whisper_base_en) | [qai_hub_models.models.whisper_base_en](qai_hub_models/models/whisper_base_en/README.md) | +| [Whisper-Small-En](https://aihub.qualcomm.com/models/whisper_small_en) | [qai_hub_models.models.whisper_small_en](qai_hub_models/models/whisper_small_en/README.md) | +| [Whisper-Tiny-En](https://aihub.qualcomm.com/models/whisper_tiny_en) | [qai_hub_models.models.whisper_tiny_en](qai_hub_models/models/whisper_tiny_en/README.md) | ### Multimodal -| Model | README | Torch App | Device Export | CLI Demo -| -- | -- | -- | -- | -- -| | | | | -| [TrOCR](https://aihub.qualcomm.com/models/trocr) | [qai_hub_models.models.trocr](qai_hub_models/models/trocr/README.md) | ✔️ | ✔️ | ✔️ -| [OpenAI-Clip](https://aihub.qualcomm.com/models/openai_clip) | [qai_hub_models.models.openai_clip](qai_hub_models/models/openai_clip/README.md) | ✔️ | ✔️ | ✔️ +| Model | README | +| -- | -- | +| | | +| [OpenAI-Clip](https://aihub.qualcomm.com/models/openai_clip) | [qai_hub_models.models.openai_clip](qai_hub_models/models/openai_clip/README.md) | +| [TrOCR](https://aihub.qualcomm.com/models/trocr) | [qai_hub_models.models.trocr](qai_hub_models/models/trocr/README.md) | ### Generative Ai -| Model | README | Torch App | Device Export | CLI Demo -| -- | -- | -- | -- | -- -| | | | | +| Model | README | +| -- | -- | +| | | | **Image Generation** -| [ControlNet](https://aihub.qualcomm.com/models/controlnet_quantized) | [qai_hub_models.models.controlnet_quantized](qai_hub_models/models/controlnet_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [Riffusion](https://aihub.qualcomm.com/models/riffusion_quantized) | [qai_hub_models.models.riffusion_quantized](qai_hub_models/models/riffusion_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [Stable-Diffusion-v1.5](https://aihub.qualcomm.com/models/stable_diffusion_v1_5_quantized) | [qai_hub_models.models.stable_diffusion_v1_5_quantized](qai_hub_models/models/stable_diffusion_v1_5_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [Stable-Diffusion-v2.1](https://aihub.qualcomm.com/models/stable_diffusion_v2_1_quantized) | [qai_hub_models.models.stable_diffusion_v2_1_quantized](qai_hub_models/models/stable_diffusion_v2_1_quantized/README.md) | ✔️ | ✔️ | ✔️ -| | | | | +| [ControlNet](https://aihub.qualcomm.com/models/controlnet_quantized) | [qai_hub_models.models.controlnet_quantized](qai_hub_models/models/controlnet_quantized/README.md) | +| [Riffusion](https://aihub.qualcomm.com/models/riffusion_quantized) | [qai_hub_models.models.riffusion_quantized](qai_hub_models/models/riffusion_quantized/README.md) | +| [Stable-Diffusion-v1.5](https://aihub.qualcomm.com/models/stable_diffusion_v1_5_quantized) | [qai_hub_models.models.stable_diffusion_v1_5_quantized](qai_hub_models/models/stable_diffusion_v1_5_quantized/README.md) | +| [Stable-Diffusion-v2.1](https://aihub.qualcomm.com/models/stable_diffusion_v2_1_quantized) | [qai_hub_models.models.stable_diffusion_v2_1_quantized](qai_hub_models/models/stable_diffusion_v2_1_quantized/README.md) | +| | | | **Text Generation** -| [Baichuan2-7B](https://aihub.qualcomm.com/models/baichuan2_7b_quantized) | [qai_hub_models.models.baichuan2_7b_quantized](qai_hub_models/models/baichuan2_7b_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [IBM-Granite-3B-Code-Instruct](https://aihub.qualcomm.com/models/ibm_granite_3b_code_instruct) | [qai_hub_models.models.ibm_granite_3b_code_instruct](qai_hub_models/models/ibm_granite_3b_code_instruct/README.md) | ✔️ | ✔️ | ✔️ -| [IndusQ-1.1B](https://aihub.qualcomm.com/models/indus_1b_quantized) | [qai_hub_models.models.indus_1b_quantized](qai_hub_models/models/indus_1b_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [JAIS-6p7b-Chat](https://aihub.qualcomm.com/models/jais_6p7b_chat_quantized) | [qai_hub_models.models.jais_6p7b_chat_quantized](qai_hub_models/models/jais_6p7b_chat_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [Llama-v2-7B-Chat](https://aihub.qualcomm.com/models/llama_v2_7b_chat_quantized) | [qai_hub_models.models.llama_v2_7b_chat_quantized](qai_hub_models/models/llama_v2_7b_chat_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [Llama-v3-8B-Chat](https://aihub.qualcomm.com/models/llama_v3_8b_chat_quantized) | [qai_hub_models.models.llama_v3_8b_chat_quantized](qai_hub_models/models/llama_v3_8b_chat_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [Llama-v3.1-8B-Chat](https://aihub.qualcomm.com/models/llama_v3_1_8b_chat_quantized) | [qai_hub_models.models.llama_v3_1_8b_chat_quantized](qai_hub_models/models/llama_v3_1_8b_chat_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [Llama-v3.2-3B-Chat](https://aihub.qualcomm.com/models/llama_v3_2_3b_chat_quantized) | [qai_hub_models.models.llama_v3_2_3b_chat_quantized](qai_hub_models/models/llama_v3_2_3b_chat_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [Mistral-3B](https://aihub.qualcomm.com/models/mistral_3b_quantized) | [qai_hub_models.models.mistral_3b_quantized](qai_hub_models/models/mistral_3b_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [Mistral-7B-Instruct-v0.3](https://aihub.qualcomm.com/models/mistral_7b_instruct_v0_3_quantized) | [qai_hub_models.models.mistral_7b_instruct_v0_3_quantized](qai_hub_models/models/mistral_7b_instruct_v0_3_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [PLaMo-1B](https://aihub.qualcomm.com/models/plamo_1b_quantized) | [qai_hub_models.models.plamo_1b_quantized](qai_hub_models/models/plamo_1b_quantized/README.md) | ✔️ | ✔️ | ✔️ -| [Qwen2-7B-Instruct](https://aihub.qualcomm.com/models/qwen2_7b_instruct_quantized) | [qai_hub_models.models.qwen2_7b_instruct_quantized](qai_hub_models/models/qwen2_7b_instruct_quantized/README.md) | ✔️ | ✔️ | ✔️ +| [Baichuan2-7B](https://aihub.qualcomm.com/models/baichuan2_7b_quantized) | [qai_hub_models.models.baichuan2_7b_quantized](qai_hub_models/models/baichuan2_7b_quantized/README.md) | +| [IBM-Granite-3B-Code-Instruct](https://aihub.qualcomm.com/models/ibm_granite_3b_code_instruct) | [qai_hub_models.models.ibm_granite_3b_code_instruct](qai_hub_models/models/ibm_granite_3b_code_instruct/README.md) | +| [IndusQ-1.1B](https://aihub.qualcomm.com/models/indus_1b_quantized) | [qai_hub_models.models.indus_1b_quantized](qai_hub_models/models/indus_1b_quantized/README.md) | +| [JAIS-6p7b-Chat](https://aihub.qualcomm.com/models/jais_6p7b_chat_quantized) | [qai_hub_models.models.jais_6p7b_chat_quantized](qai_hub_models/models/jais_6p7b_chat_quantized/README.md) | +| [Llama-v2-7B-Chat](https://aihub.qualcomm.com/models/llama_v2_7b_chat_quantized) | [qai_hub_models.models.llama_v2_7b_chat_quantized](qai_hub_models/models/llama_v2_7b_chat_quantized/README.md) | +| [Llama-v3-8B-Chat](https://aihub.qualcomm.com/models/llama_v3_8b_chat_quantized) | [qai_hub_models.models.llama_v3_8b_chat_quantized](qai_hub_models/models/llama_v3_8b_chat_quantized/README.md) | +| [Llama-v3.1-8B-Chat](https://aihub.qualcomm.com/models/llama_v3_1_8b_chat_quantized) | [qai_hub_models.models.llama_v3_1_8b_chat_quantized](qai_hub_models/models/llama_v3_1_8b_chat_quantized/README.md) | +| [Llama-v3.2-3B-Chat](https://aihub.qualcomm.com/models/llama_v3_2_3b_chat_quantized) | [qai_hub_models.models.llama_v3_2_3b_chat_quantized](qai_hub_models/models/llama_v3_2_3b_chat_quantized/README.md) | +| [Mistral-3B](https://aihub.qualcomm.com/models/mistral_3b_quantized) | [qai_hub_models.models.mistral_3b_quantized](qai_hub_models/models/mistral_3b_quantized/README.md) | +| [Mistral-7B-Instruct-v0.3](https://aihub.qualcomm.com/models/mistral_7b_instruct_v0_3_quantized) | [qai_hub_models.models.mistral_7b_instruct_v0_3_quantized](qai_hub_models/models/mistral_7b_instruct_v0_3_quantized/README.md) | +| [PLaMo-1B](https://aihub.qualcomm.com/models/plamo_1b_quantized) | [qai_hub_models.models.plamo_1b_quantized](qai_hub_models/models/plamo_1b_quantized/README.md) | +| [Qwen2-7B-Instruct](https://aihub.qualcomm.com/models/qwen2_7b_instruct_quantized) | [qai_hub_models.models.qwen2_7b_instruct_quantized](qai_hub_models/models/qwen2_7b_instruct_quantized/README.md) | + + +## Need help? +Slack: https://aihub.qualcomm.com/community/slack + +GitHub Issues: https://github.com/quic/ai-hub-models/issues + +Email: ai-hub-support@qti.qualcomm.com. + +## LICENSE + +Qualcomm® AI Hub Models is licensed under BSD-3. See the [LICENSE file](LICENSE). diff --git a/qai_hub_models/_version.py b/qai_hub_models/_version.py index 54a56de7..13ccee18 100644 --- a/qai_hub_models/_version.py +++ b/qai_hub_models/_version.py @@ -2,4 +2,4 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- -__version__ = "0.17.0" +__version__ = "0.18.0" diff --git a/qai_hub_models/datasets/__init__.py b/qai_hub_models/datasets/__init__.py index f916941d..db4e1e3b 100644 --- a/qai_hub_models/datasets/__init__.py +++ b/qai_hub_models/datasets/__init__.py @@ -6,7 +6,7 @@ from .bsd300 import BSD300Dataset from .coco import CocoDataset -from .common import BaseDataset +from .common import BaseDataset, DatasetSplit from .imagenet import ImagenetDataset from .imagenette import ImagenetteDataset from .pascal_voc import VOCSegmentationDataset @@ -24,6 +24,6 @@ } -def get_dataset_from_name(name: str) -> BaseDataset: +def get_dataset_from_name(name: str, split: DatasetSplit) -> BaseDataset: dataset_cls = DATASET_NAME_MAP[name] - return dataset_cls() # type: ignore + return dataset_cls(split=split) # type: ignore diff --git a/qai_hub_models/datasets/bsd300.py b/qai_hub_models/datasets/bsd300.py index a61dec39..34b3fb7a 100644 --- a/qai_hub_models/datasets/bsd300.py +++ b/qai_hub_models/datasets/bsd300.py @@ -5,12 +5,13 @@ from __future__ import annotations import os +from itertools import chain import numpy as np import torch from PIL import Image -from qai_hub_models.datasets.common import BaseDataset +from qai_hub_models.datasets.common import BaseDataset, DatasetSplit from qai_hub_models.utils.asset_loaders import CachedWebDatasetAsset BSD300_URL = ( @@ -21,7 +22,8 @@ BSD300_ASSET = CachedWebDatasetAsset( BSD300_URL, BSD300_FOLDER_NAME, BSD300_VERSION, "BSDS300.tgz" ) -DATASET_LENGTH = 200 +NUM_TEST_IMAGES = 100 +NUM_TRAIN_IMAGES = 200 class BSD300Dataset(BaseDataset): @@ -29,55 +31,69 @@ class BSD300Dataset(BaseDataset): BSD300 published here: https://www2.eecs.berkeley.edu/Research/Projects/CS/vision/bsds/ """ - def __init__(self, scaling_factor=4): + def __init__( + self, + input_height: int = 128, + input_width: int = 128, + scaling_factor: int = 4, + split: DatasetSplit = DatasetSplit.TRAIN, + ): self.bsd_path = BSD300_ASSET.path(extracted=True) - self.images_path = self.bsd_path / "images" / "train" - BaseDataset.__init__(self, self.bsd_path) + + # bsd300 doesn't have a val split, so use the test split for this purpose + split = DatasetSplit.TEST if split == DatasetSplit.VAL else split + + BaseDataset.__init__(self, self.bsd_path, split) self.scaling_factor = scaling_factor + self.input_height = input_height + self.input_width = input_width + self.image_files = sorted(os.listdir(self.images_path)) def _validate_data(self) -> bool: # Check image path exists + self.images_path = self.bsd_path / "images" / self.split_str if not self.images_path.exists(): return False # Ensure the correct number of images are there - images = [f for f in self.images_path.iterdir() if ".jpg" in f.name] - if len(images) != DATASET_LENGTH: + images = [f for f in self.images_path.iterdir() if ".png" in f.name] + expected_num_images = len(self) + if len(images) != expected_num_images: return False return True def _prepare_data(self): - # Rename images to be more friendly to enumeration - # directory = os.path.join(self.dataset_path, "images/train") - # files = os.listdir(directory) - for i, filepath in enumerate(self.images_path.iterdir()): + """Convert jpg to png.""" + train_path = self.bsd_path / "images" / "train" + test_path = self.bsd_path / "images" / "test" + for i, filepath in enumerate(chain(train_path.iterdir(), test_path.iterdir())): if filepath.name.endswith(".jpg"): - # Open the image and convert it to png - try: - with Image.open(filepath) as img: - img.save(self.images_path / f"img_{i + 1:03d}_HR.jpg") - # delete the old image - os.remove(filepath) - except ValueError: - print(f"File {filepath} does not exist!") + with Image.open(filepath) as img: + img.save(filepath.parent / f"img_{i + 1:03d}_HR.png") + # delete the old image + os.remove(filepath) def __len__(self): - return DATASET_LENGTH + return NUM_TRAIN_IMAGES if self.split_str == "train" else NUM_TEST_IMAGES def __getitem__(self, item) -> tuple[torch.Tensor, torch.Tensor]: # We use the super resolution GT-and-test image preparation from AIMET zoo: # https://github.com/quic/aimet-model-zoo/blob/d09d2b0404d10f71a7640a87e9d5e5257b028802/aimet_zoo_torch/quicksrnet/dataloader/utils.py#L51 - - img = np.asarray( - Image.open(os.path.join(self.images_path, f"img_{item + 1:03d}_HR.jpg")) + img = Image.open(os.path.join(self.images_path, self.image_files[item])) + img = img.resize( + ( + self.input_width * self.scaling_factor, + self.input_height * self.scaling_factor, + ) ) - height, width = img.shape[0:2] + img_arr = np.asarray(img) + height, width = img_arr.shape[0:2] # If portrait, transpose to landscape so that all tensors are equal size if height > width: - img = np.transpose(img, (1, 0, 2)) - height, width = img.shape[0:2] + img_arr = np.transpose(img_arr, (1, 0, 2)) + height, width = img_arr.shape[0:2] # Take the largest possible center-crop of it such that its dimensions are perfectly divisible by the scaling factor x_remainder = width % ( @@ -94,7 +110,7 @@ def __getitem__(self, item) -> tuple[torch.Tensor, torch.Tensor]: top = int(y_remainder // 2) right = int(left + (width - x_remainder)) bottom = int(top + (height - y_remainder)) - hr_img = img[top:bottom, left:right] + hr_img = img_arr[top:bottom, left:right] hr_height, hr_width = hr_img.shape[0:2] diff --git a/qai_hub_models/datasets/coco.py b/qai_hub_models/datasets/coco.py index e18573e1..e830ff93 100644 --- a/qai_hub_models/datasets/coco.py +++ b/qai_hub_models/datasets/coco.py @@ -2,31 +2,21 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- -import os +from pathlib import Path from typing import Union import torch +import torch.nn.functional as F +from fiftyone.core.sample import SampleView +from PIL import Image from torch.utils.data.dataloader import default_collate -from torchvision.datasets.coco import CocoDetection -from qai_hub_models.datasets.common import BaseDataset -from qai_hub_models.utils.asset_loaders import CachedWebDatasetAsset +from qai_hub_models.datasets.common import BaseDataset, DatasetSplit, setup_fiftyone_env from qai_hub_models.utils.image_processing import app_to_net_image_inputs +from qai_hub_models.utils.path_helpers import get_qaihm_package_root DATASET_ID = "coco" DATASET_ASSET_VERSION = 1 -COCO_DATASET = CachedWebDatasetAsset( - "http://images.cocodataset.org/zips/val2017.zip", - DATASET_ID, - DATASET_ASSET_VERSION, - "val2017.zip", -) -COCO_ANNOTATIONS = CachedWebDatasetAsset( - "http://images.cocodataset.org/annotations/annotations_trainval2017.zip", - DATASET_ID, - DATASET_ASSET_VERSION, - "annotations_trainval2017.zip", -) def collate_fn(batch): @@ -45,81 +35,130 @@ def collate_fn(batch): new_list.append(target) return new_list except Exception: - return [], ([], [], [], [], []) + return [], ([], [], [], [], [], []) -class CocoDataset(BaseDataset, CocoDetection): +class CocoDataset(BaseDataset): """ - Class for using the COCODetection dataset published here: + Wrapper class around COCO dataset https://cocodataset.org/ + Contains object detection samples and labels spanning 80 classes. - Contains ~5k images spanning 80 classes. + This wrapper supports the train and val splits of the 2017 version. """ - def __init__(self, target_image_size: Union[int, tuple[int, int]] = 640): - BaseDataset.__init__(self, str(COCO_DATASET.path(extracted=True))) - CocoDetection.__init__( - self, - root=COCO_DATASET.path() / "val2017", - annFile=COCO_ANNOTATIONS.path() / "annotations" / "instances_val2017.json", - ) + def __init__( + self, + target_image_size: Union[int, tuple[int, int]] = 640, + split: DatasetSplit = DatasetSplit.TRAIN, + max_boxes: int = 100, + num_samples: int = 5000, + ): + """ + Parameters: + target_image_size: The size to which the input images will be resized. + split: Whether to use the train or val split of the dataset. + max_boxes: The maximum number of boxes for a given sample. Used so that + when loading multiple samples in a batch via a dataloader, this will + be the tensor dimension. + + If a sample has fewer than this many boxes, the tensor of boxes + will be zero padded up to this amount. + + If a sample has more than this many boxes, an exception is thrown. + num_samples: Number of data samples to download. Needs to be specified + during initialization because only as many samples as requested + are downloaded. + """ + self.num_samples = num_samples + + # FiftyOne package manages dataset so pass a dummy name for data path + BaseDataset.__init__(self, "non_existent_dir", split) - categories = self.coco.loadCats(self.coco.getCatIds()) - categories.sort(key=lambda x: x["id"]) - self.label_map = {} counter = 0 - for c in categories: - self.label_map[c["id"]] = counter - counter += 1 + self.label_map = {} + with open(get_qaihm_package_root() / "labels" / "coco_labels.txt") as f: + for line in f.readlines(): + self.label_map[line.strip()] = counter + counter += 1 + self.target_image_size = ( target_image_size if isinstance(target_image_size, tuple) else (target_image_size, target_image_size) ) + self.max_boxes = max_boxes def __getitem__(self, item): - image, target = super().__getitem__(item) + """ + Returns a tuple of input image tensor and label data. + + Label data is a tuple with the following entries: + - Image ID within the original dataset + - height (in pixels) + - width (in pixels) + - bounding box data with shape (self.max_boxes, 4) + - The 4 should be normalized (x, y, w, h) + - labels with shape (self.max_boxes,) + - number of actual boxes present + """ + sample = self.dataset[item : item + 1].first() + assert isinstance(sample, SampleView) + image = Image.open(sample.filepath).convert("RGB") width, height = image.size boxes = [] labels = [] - for annotation in target: - bbox = annotation.get("bbox") - boxes.append( - [ - bbox[0] / width, - bbox[1] / height, - (bbox[0] + bbox[2]) / width, - (bbox[1] + bbox[3]) / height, - ] - ) - labels.append(self.label_map[annotation.get("category_id")]) + if sample.ground_truth is not None: + for annotation in sample.ground_truth.detections: + if annotation.label not in self.label_map: + print(f"Warning: Invalid label {annotation.label}") + continue + x, y, w, h = annotation.bounding_box + boxes.append([x, y, x + w, y + h]) + # Convert string label to int idx + labels.append(self.label_map[annotation.label]) boxes = torch.tensor(boxes) labels = torch.tensor(labels) + + # Pad the number of boxes to a standard value + num_boxes = len(labels) + if num_boxes == 0: + boxes = torch.zeros((100, 4)) + labels = torch.zeros(100) + elif num_boxes > self.max_boxes: + raise ValueError( + f"Sample has more boxes than max boxes {self.max_boxes}. " + "Re-initialize the dataset with a larger value for max_boxes." + ) + else: + boxes = F.pad(boxes, (0, 0, 0, self.max_boxes - num_boxes), value=0) + labels = F.pad(labels, (0, self.max_boxes - num_boxes), value=0) + image = image.resize(self.target_image_size) image = app_to_net_image_inputs(image)[1].squeeze(0) return image, ( - target[0]["image_id"] if len(target) > 0 else 0, + int(Path(sample.filepath).name[:-4]), height, width, boxes, labels, + torch.tensor([num_boxes]), ) - def _validate_data(self) -> bool: - # Check validation data exists - if not (COCO_DATASET.path() / "val2017").exists(): - return False + def __len__(self) -> int: + return len(self.dataset) - # Check annotations exist - if not COCO_ANNOTATIONS.path().exists(): - return False + def _validate_data(self) -> bool: + return hasattr(self, "dataset") - # Ensure there are 5000 samples - if len(os.listdir(COCO_DATASET.path() / "val2017")) < 5000: - return False + def _download_data(self) -> None: + setup_fiftyone_env() - return True + # This is an expensive import, so don't want to unnecessarily import it in + # other files that import datasets/__init__.py + import fiftyone.zoo as foz - def _download_data(self) -> None: - COCO_DATASET.fetch(extract=True) - COCO_ANNOTATIONS.fetch(extract=True) + split_str = "validation" if self.split == DatasetSplit.VAL else "train" + self.dataset = foz.load_zoo_dataset( + "coco-2017", split=split_str, max_samples=self.num_samples, shuffle=True + ) diff --git a/qai_hub_models/datasets/common.py b/qai_hub_models/datasets/common.py index 7ec43093..4f31bec9 100644 --- a/qai_hub_models/datasets/common.py +++ b/qai_hub_models/datasets/common.py @@ -7,19 +7,37 @@ import os import shutil from abc import ABC, abstractmethod +from enum import Enum, unique from pathlib import Path from typing import final from torch.utils.data import Dataset +from qai_hub_models.utils.asset_loaders import LOCAL_STORE_DEFAULT_PATH + + +@unique +class DatasetSplit(Enum): + """ + Distinct splits of the dataset should be used for training vs. validation. + + This enum can be set during dataset initialization to indicate which split to use. + """ + + TRAIN = 0 + VAL = 1 + TEST = 2 + class BaseDataset(Dataset, ABC): """ Base class to be extended by Datasets used in this repo for quantizing models. """ - def __init__(self, dataset_path: str | Path): + def __init__(self, dataset_path: str | Path, split: DatasetSplit): self.dataset_path = Path(dataset_path) + self.split = split + self.split_str = split.name.lower() self.download_data() @final @@ -59,3 +77,26 @@ def dataset_name(cls) -> str: which by default is set to the filename where the class is defined. """ return cls.__module__.split(".")[-1] + + +def setup_fiftyone_env(): + """ + FiftyOne is an external library that provides utilities for downloading and storing + datasets. We want all of its operations to be done within the ai-hub-models cache + directory. + + Import within the function so it only happens when the function is called. + """ + try: + import fiftyone as fo + except (ImportError, ModuleNotFoundError): + raise ImportError( + "This dataset requires the `fiftyone` module. " + "Run `pip install fiftyone==1.0.1` to use this dataset." + ) + + fiftyone_dir = os.path.join(LOCAL_STORE_DEFAULT_PATH, "fiftyone") + fo.config.database_dir = os.path.join(fiftyone_dir, "mongo") + fo.config.dataset_zoo_dir = fiftyone_dir + fo.config.default_dataset_dir = fiftyone_dir + fo.config.model_zoo_dir = os.path.join(fiftyone_dir, "__models__") diff --git a/qai_hub_models/datasets/imagenet.py b/qai_hub_models/datasets/imagenet.py index 96d8ff71..ceb6512e 100644 --- a/qai_hub_models/datasets/imagenet.py +++ b/qai_hub_models/datasets/imagenet.py @@ -7,7 +7,7 @@ from torchvision.datasets import ImageNet -from qai_hub_models.datasets.common import BaseDataset +from qai_hub_models.datasets.common import BaseDataset, DatasetSplit from qai_hub_models.utils.asset_loaders import CachedWebDatasetAsset from qai_hub_models.utils.image_processing import IMAGENET_TRANSFORM @@ -40,7 +40,7 @@ class ImagenetDataset(BaseDataset, ImageNet): Wrapper class for using the Imagenet validation dataset: https://www.image-net.org/ """ - def __init__(self): + def __init__(self, split: DatasetSplit = DatasetSplit.VAL): """ A direct download link for the validation set is not available. Users should download the validation dataset manually and pass the local filepath @@ -49,16 +49,18 @@ def __init__(self): input_data_path: Local filepath to imagenet validation set. """ - BaseDataset.__init__(self, IMAGENET_ASSET.path().parent) + if split != DatasetSplit.VAL: + raise ValueError("Imagenet dataset currently only supports `val` split") + BaseDataset.__init__(self, IMAGENET_ASSET.path().parent, split) ImageNet.__init__( self, - root=self.dataset_path, - split="val", + root=str(self.dataset_path), + split=self.split_str, transform=IMAGENET_TRANSFORM, ) def _validate_data(self) -> bool: - val_path = self.dataset_path / "val" + val_path = self.dataset_path / self.split_str if not (self.dataset_path / DEVKIT_NAME).exists(): print("Missing Devkit.") return False @@ -78,7 +80,7 @@ def _validate_data(self) -> bool: return True def _download_data(self) -> None: - val_path = self.dataset_path / "val" + val_path = self.dataset_path / self.split_str os.makedirs(val_path, exist_ok=True) IMAGENET_ASSET.fetch(extract=True) diff --git a/qai_hub_models/datasets/imagenette.py b/qai_hub_models/datasets/imagenette.py index f9b92fa6..4f46f7c7 100644 --- a/qai_hub_models/datasets/imagenette.py +++ b/qai_hub_models/datasets/imagenette.py @@ -7,7 +7,7 @@ from torchvision.datasets import ImageNet -from qai_hub_models.datasets.common import BaseDataset +from qai_hub_models.datasets.common import BaseDataset, DatasetSplit from qai_hub_models.utils.asset_loaders import CachedWebDatasetAsset from qai_hub_models.utils.image_processing import IMAGENET_TRANSFORM @@ -51,12 +51,14 @@ class ImagenetteDataset(BaseDataset, ImageNet): Contains ~4k images spanning 10 of the imagenet classes. """ - def __init__(self): - BaseDataset.__init__(self, str(IMAGENETTE_ASSET.path(extracted=True))) + def __init__(self, split: DatasetSplit = DatasetSplit.TRAIN): + BaseDataset.__init__( + self, str(IMAGENETTE_ASSET.path(extracted=True)), split=split + ) ImageNet.__init__( self, - root=IMAGENETTE_ASSET.path(), - split="val", + root=str(IMAGENETTE_ASSET.path()), + split=self.split_str, transform=IMAGENET_TRANSFORM, target_transform=lambda val: IMAGENETTE_CLASS_MAP[val], ) @@ -74,7 +76,7 @@ def _validate_data(self) -> bool: return False # Check val data exists - val_data_path = self.dataset_path / "val" + val_data_path = self.dataset_path / self.split_str if not val_data_path.exists(): return False diff --git a/qai_hub_models/datasets/pascal_voc.py b/qai_hub_models/datasets/pascal_voc.py index 00252ef6..009a10f4 100644 --- a/qai_hub_models/datasets/pascal_voc.py +++ b/qai_hub_models/datasets/pascal_voc.py @@ -3,13 +3,12 @@ # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- - import numpy as np import torch from PIL import Image from torchvision import transforms -from qai_hub_models.datasets.common import BaseDataset +from qai_hub_models.datasets.common import BaseDataset, DatasetSplit from qai_hub_models.utils.asset_loaders import CachedWebDatasetAsset VOC_FOLDER_NAME = "voc" @@ -29,10 +28,16 @@ class VOCSegmentationDataset(BaseDataset): https://host.robots.ox.ac.uk/pascal/VOC/voc2012/ """ - def __init__(self, split: str = "train", image_size: tuple[int, int] = (224, 224)): - BaseDataset.__init__(self, str(VOC_ASSET.path().parent / DEVKIT_FOLDER_NAME)) - assert split in ["train", "val", "trainval"] - self.split = split + def __init__( + self, + split: DatasetSplit = DatasetSplit.TRAIN, + input_height: int = 520, + input_width: int = 520, + ): + BaseDataset.__init__( + self, str(VOC_ASSET.path().parent / DEVKIT_FOLDER_NAME), split + ) + assert self.split_str in ["train", "val", "trainval"] base_path = self.dataset_path / "VOC2012" image_dir = base_path / "JPEGImages" @@ -43,7 +48,7 @@ def __init__(self, split: str = "train", image_size: tuple[int, int] = (224, 224 self.images = [] self.categories = [] - with open(splits_dir / (split + ".txt")) as f: + with open(splits_dir / (self.split_str + ".txt")) as f: lines = f.read().splitlines() for line in lines: @@ -55,17 +60,20 @@ def __init__(self, split: str = "train", image_size: tuple[int, int] = (224, 224 self.images.append(image_path) self.categories.append(category_path) - self.image_size = image_size + self.input_height = input_height + self.input_width = input_width self.image_transform = transforms.Compose( [ - transforms.Resize(image_size), + transforms.Resize((self.input_height, self.input_width)), transforms.ToTensor(), ] ) def __getitem__(self, index): img = self.image_transform(Image.open(self.images[index]).convert("RGB")) - target_img = Image.open(self.categories[index]).resize(self.image_size[::-1]) + target_img = Image.open(self.categories[index]).resize( + (self.input_width, self.input_height) + ) target = torch.from_numpy(np.array(target_img)).float() return img, target diff --git a/qai_hub_models/evaluators/base_evaluators.py b/qai_hub_models/evaluators/base_evaluators.py index 1df96140..a6624f89 100644 --- a/qai_hub_models/evaluators/base_evaluators.py +++ b/qai_hub_models/evaluators/base_evaluators.py @@ -29,7 +29,7 @@ class BaseEvaluator(ABC): def add_batch( self, output, # torch.Tensor | Collection[torch.Tensor] - ground_truth, # torch.Tensor | Collection[torch.Tensor] + gt, # torch.Tensor | Collection[torch.Tensor] ) -> None: """ Add a batch of data to this evaluator. diff --git a/qai_hub_models/evaluators/detection_evaluator.py b/qai_hub_models/evaluators/detection_evaluator.py index bd0e0800..a3ea7d0c 100644 --- a/qai_hub_models/evaluators/detection_evaluator.py +++ b/qai_hub_models/evaluators/detection_evaluator.py @@ -33,76 +33,73 @@ def __init__( self.scale_x = 1 / image_height self.scale_y = 1 / image_width - def add_batch( - self, - output: Collection[torch.Tensor], - gt: Collection[torch.Tensor], - ): - # This evaluator supports 1 output tensor at a time. - image_id, _, _, bboxes, classes = gt + def add_batch(self, output: Collection[torch.Tensor], gt: Collection[torch.Tensor]): + """ + gt should be a tuple of tensors with the following tensors: + - image_ids of shape (batch_size,) + - image heights of shape (batch_size,) + - image widths of shape (batch_size,) + - bounding boxes of shape (batch_size, max_boxes, 4) + - The 4 should be normalized (x, y, w, h) + - classes of shape (batch_size, max_boxes) + - num nonzero boxes for each sample of shape (batch_size,) + + output should be a tuple of tensors with the following tensors: + - bounding boxes with shape (batch_size, num_candidate_boxes, 4) + - The 4 should be normalized (x, y, w, h) + - scores with shape (batch_size, num_candidate_boxes) + - class predictions with shape (batch_size, num_candidate_boxes) + """ + image_ids, _, _, all_bboxes, all_classes, all_num_boxes = gt pred_boxes, pred_scores, pred_class_idx = output - if bboxes.numel() == 0: - return - - # The number of boxes can be variable, so dataloader doesn't like shapes - # mismatching across samples in the batch. - assert bboxes.shape[0] == 1, "Detection evaluator only supports batch size 1." - bboxes = bboxes.squeeze(0) - classes = classes.squeeze(0) - - # Seeing memory issues, initentionally deleting these variables to free memory. - del gt - del output - - # Reuse NMS utility - ( - after_nms_pred_boxes, - after_nms_pred_scores, - after_nms_pred_class_idx, - ) = batched_nms( - self.nms_iou_threshold, - self.nms_score_threshold, - pred_boxes, - pred_scores, - pred_class_idx, - ) - - del pred_boxes - del pred_scores - del pred_class_idx - - # Collect GT and prediction boxes - gt_bb_entry = [ - BoundingBox.of_bbox(image_id, cat, *bbox, 1.0) - for cat, bbox in zip(classes.tolist(), bboxes.tolist()) - ] - del classes - del bboxes - - pd_bb_entry = [ - BoundingBox.of_bbox( - image_id, - pred_cat, - pred_bbox[0] * self.scale_x, - pred_bbox[1] * self.scale_y, - pred_bbox[2] * self.scale_x, - pred_bbox[3] * self.scale_y, - pred_score, - ) - for pred_cat, pred_score, pred_bbox in zip( - after_nms_pred_class_idx[0].tolist(), - after_nms_pred_scores[0].tolist(), - after_nms_pred_boxes[0].tolist(), + for i in range(len(image_ids)): + image_id = image_ids[i] + bboxes = all_bboxes[i][: all_num_boxes[i].item()] + classes = all_classes[i][: all_num_boxes[i].item()] + if bboxes.numel() == 0: + continue + + # Reuse NMS utility + ( + after_nms_pred_boxes, + after_nms_pred_scores, + after_nms_pred_class_idx, + ) = batched_nms( + self.nms_iou_threshold, + self.nms_score_threshold, + pred_boxes[i : i + 1], + pred_scores[i : i + 1], + pred_class_idx[i : i + 1], ) - ] - - del after_nms_pred_boxes - del after_nms_pred_scores - del after_nms_pred_class_idx - # Compute mean average precision - self._update_mAP(gt_bb_entry, pd_bb_entry) + # Collect GT and prediction boxes + gt_bb_entry = [ + BoundingBox.of_bbox( + image_id, cat, bbox[0], bbox[1], bbox[2], bbox[3], 1.0 + ) + for cat, bbox in zip(classes.tolist(), bboxes.tolist()) + ] + + pd_bb_entry = [ + BoundingBox.of_bbox( + image_id, + pred_cat, + pred_bbox[0] * self.scale_x, + pred_bbox[1] * self.scale_y, + pred_bbox[2] * self.scale_x, + pred_bbox[3] * self.scale_y, + pred_score, + ) + for pred_cat, pred_score, pred_bbox in zip( + after_nms_pred_class_idx[0].tolist(), + after_nms_pred_scores[0].tolist(), + after_nms_pred_boxes[0].tolist(), + ) + ] + + # Compute mean average precision + self._update_mAP(gt_bb_entry, pd_bb_entry) def reset(self): self.gt_bb = [] @@ -113,8 +110,6 @@ def _update_mAP(self, gt_bb_entry, pd_bb_entry): self.gt_bb += gt_bb_entry self.pd_bb += pd_bb_entry - del gt_bb_entry - del pd_bb_entry self.results = get_pascal_voc_metrics( self.gt_bb, self.pd_bb, self.nms_iou_threshold ) @@ -124,4 +119,4 @@ def get_accuracy_score(self): return self.mAP def formatted_accuracy(self) -> str: - return f"{self.get_accuracy_score()} mAP" + return f"{self.get_accuracy_score():.3f} mAP" diff --git a/qai_hub_models/evaluators/segmentation_evaluator.py b/qai_hub_models/evaluators/segmentation_evaluator.py index 212e26f6..d71fe36b 100644 --- a/qai_hub_models/evaluators/segmentation_evaluator.py +++ b/qai_hub_models/evaluators/segmentation_evaluator.py @@ -18,7 +18,7 @@ def __init__(self, num_classes): def add_batch(self, output: torch.Tensor, gt: torch.Tensor): # This evaluator supports only 1 output tensor at a time. - output = output.argmax(1).cpu() + output = output.cpu() assert gt.shape == output.shape self.confusion_matrix += self._generate_matrix(gt, output) diff --git a/qai_hub_models/evaluators/superres_evaluator.py b/qai_hub_models/evaluators/superres_evaluator.py index f09cf57d..88f3dc5d 100644 --- a/qai_hub_models/evaluators/superres_evaluator.py +++ b/qai_hub_models/evaluators/superres_evaluator.py @@ -51,9 +51,9 @@ def add_batch(self, output: torch.Tensor, gt: torch.Tensor): def reset(self): self.psnr_list = [] - def compute_average_psnr(self): + def compute_average_psnr(self) -> float: average_psnr = np.mean(np.array(self.psnr_list)) - return average_psnr + return float(average_psnr) def get_accuracy_score(self) -> float: return self.compute_average_psnr() diff --git a/qai_hub_models/global_requirements.txt b/qai_hub_models/global_requirements.txt index f8762bad..9f44e9ea 100644 --- a/qai_hub_models/global_requirements.txt +++ b/qai_hub_models/global_requirements.txt @@ -15,9 +15,11 @@ boto3>=1.34,<1.36 botocore>=1.34,<1.36 data-gradients==0.3.1 datasets==2.14.5 -diffusers[torch]==0.21.4 +diffusers[torch]==0.31.0 easydict==1.13 einops==0.3.2 +ffmpeg==1.4 +fiftyone==1.0.1 ftfy==6.1.1 gdown==4.7.1 gitpython==3.1.42 @@ -40,6 +42,7 @@ mypy==1.13.0 numpy>=1.23.5,< 2 # 1.23.5 required by AIMET object-detection-metrics==0.4.post1 onnx>=1.14.1,<1.17 # ONNX must be at least 1.14.1. AIMET-torch and AIMET-ONNX use different ONNX versions. +onnxruntime<1.20.0 onnxsim<=0.4.36 openai-whisper==20231117 opencv-python>4,<5 @@ -65,6 +68,7 @@ scikit-image>0.21.0,<0.25 scikit-learn>1.1,<1.6 scipy>=1.8.1,<2 # 1.8.1 is for AIMET seaborn==0.11.0 +segment-anything==1.0 sentencepiece==0.2.0 shapely==2.0.3 soundfile==0.12.1 @@ -77,6 +81,7 @@ thop==0.1.1.post2209072238 timm==1.0.3 torch>=2.1.2,<2.5.0 # 2.1.2 is for AIMET. 2.5 won't work with torchvision yet. torchmetrics==1.4.0.post0 +torchpack==0.3.1 torchvision>=0.16.2,<0.21 tqdm>=4.66 transformers==4.41.1 diff --git a/qai_hub_models/models/_shared/body_detection/app.py b/qai_hub_models/models/_shared/body_detection/app.py index 2bceaab1..a467e314 100644 --- a/qai_hub_models/models/_shared/body_detection/app.py +++ b/qai_hub_models/models/_shared/body_detection/app.py @@ -140,7 +140,7 @@ def detect(self, imgfile: str, height: int, width: int, conf: float) -> np.ndarr (cls_id, x1, y1, x2, y2, score) """ img = np.array(load_image(imgfile)) - img = torch.from_numpy(img).permute(2, 0, 1).unsqueeze_(0) + img = torch.from_numpy(img).permute(2, 0, 1).unsqueeze_(0) / 255.0 input, scale, pad = resize_pad(img, (height, width)) output = self.model(input) for t, o in enumerate(output): diff --git a/qai_hub_models/models/_shared/body_detection/model.py b/qai_hub_models/models/_shared/body_detection/model.py index 27f66da3..f37fa856 100644 --- a/qai_hub_models/models/_shared/body_detection/model.py +++ b/qai_hub_models/models/_shared/body_detection/model.py @@ -510,7 +510,6 @@ def forward(self, x: torch.Tensor) -> list[torch.Tensor]: Outputs: list[torch.Tensor] Multi-scale object detection output. """ - x = x / 255.0 y = [] for m in self.model: if m.f != -1: diff --git a/qai_hub_models/models/_shared/deeplab/app.py b/qai_hub_models/models/_shared/deeplab/app.py index 7a3a5221..eb954f90 100644 --- a/qai_hub_models/models/_shared/deeplab/app.py +++ b/qai_hub_models/models/_shared/deeplab/app.py @@ -70,7 +70,7 @@ def predict(self, image: Image, raw_output: bool = False) -> Image | np.ndarray: input_tensor = preprocess_image(image) output = self.model(input_tensor) output = output[0] - predictions = output.argmax(0).byte().cpu().numpy() + predictions = output.cpu().numpy() if raw_output: return predictions diff --git a/qai_hub_models/models/_shared/deeplab/model.py b/qai_hub_models/models/_shared/deeplab/model.py index 77c5a42a..17e06b31 100644 --- a/qai_hub_models/models/_shared/deeplab/model.py +++ b/qai_hub_models/models/_shared/deeplab/model.py @@ -41,13 +41,16 @@ def forward(self, image): """ if self.normalize_input: image = normalize_image_torchvision(image) - return self.model(image) + model_out = self.model(image) + if isinstance(model_out, dict): + model_out = model_out["out"] + return model_out.argmax(1).byte() @staticmethod def get_input_spec( batch_size: int = 1, - height: int = 513, - width: int = 513, + height: int = 520, + width: int = 520, ) -> InputSpec: # Get the input specification ordered (name -> (shape, type)) pairs for this model. # @@ -62,7 +65,3 @@ def get_output_names() -> list[str]: @staticmethod def get_channel_last_inputs() -> list[str]: return ["image"] - - @staticmethod - def get_channel_last_outputs() -> list[str]: - return ["mask"] diff --git a/qai_hub_models/models/_shared/llama3/app.py b/qai_hub_models/models/_shared/llama3/app.py index c6c8536a..ae813c9b 100644 --- a/qai_hub_models/models/_shared/llama3/app.py +++ b/qai_hub_models/models/_shared/llama3/app.py @@ -154,7 +154,7 @@ def generate_output_prompt( # Shift attention_mask and position_ids attention_mask = torch.cat( - (attention_mask[:, seq_len:], torch.zeros((1, seq_len))), dim=-1 + (attention_mask[:, seq_len:], torch.ones((1, seq_len))), dim=-1 ) position_ids = (position_ids[:, -1] + 1).reshape(-1, 1) diff --git a/qai_hub_models/models/_shared/llama3/export.py b/qai_hub_models/models/_shared/llama3/export.py index 762fc77a..346dd8cb 100644 --- a/qai_hub_models/models/_shared/llama3/export.py +++ b/qai_hub_models/models/_shared/llama3/export.py @@ -347,6 +347,10 @@ def export_model( torch_out, ) + print( + "These models can be deployed on-device using the Genie SDK. For a full tutorial, please follow the instructions here: https://github.com/quic/ai-hub-apps/tree/main/tutorials/llm_on_genie." + ) + return { sub_component_name: ( link_jobs[component_name], diff --git a/qai_hub_models/models/_shared/llama3/split_onnx_utils/split_onnx.py b/qai_hub_models/models/_shared/llama3/split_onnx_utils/split_onnx.py index 45607848..1e75e2cd 100644 --- a/qai_hub_models/models/_shared/llama3/split_onnx_utils/split_onnx.py +++ b/qai_hub_models/models/_shared/llama3/split_onnx_utils/split_onnx.py @@ -142,7 +142,7 @@ def split(self, list_of_intermediate_output_tensors): covered_output_tensors.update([i.name for i in subgraph.output]) yield subgraph - graphname = f"{self.model.graph.name}_split{count+1}" + graphname = f"{self.model.graph.name}_split{count + 1}" last_output_tensors = [ i.name for i in self.model.graph.output @@ -204,7 +204,7 @@ def split_onnx_by_names( # 1. split model new_model_info = [] for i, subgraph in enumerate(splitter.split(list_of_output_tensors)): - new_basename = f"{os.path.basename(onnxfile)}_{i+1}_of_{num_splits}" + new_basename = f"{os.path.basename(onnxfile)}_{i + 1}_of_{num_splits}" input_tensors = [i.name for i in subgraph.input] new_model_info.append([new_basename, input_tensors]) diff --git a/qai_hub_models/models/_shared/llama3/split_onnx_utils/utils.py b/qai_hub_models/models/_shared/llama3/split_onnx_utils/utils.py index 90c0cfe1..3dd0a09f 100644 --- a/qai_hub_models/models/_shared/llama3/split_onnx_utils/utils.py +++ b/qai_hub_models/models/_shared/llama3/split_onnx_utils/utils.py @@ -224,7 +224,7 @@ def split_onnx_by_names( # 1. split model new_model_info = [] for i, subgraph in enumerate(splitter.split(list_of_output_tensors)): - new_basename = f"{modelname}_{i+1}_of_{num_splits}" + new_basename = f"{modelname}_{i + 1}_of_{num_splits}" input_tensor_names = [i.name for i in subgraph.input] output_tensor_names = [i.name for i in subgraph.output] new_model_info.append([new_basename, input_tensor_names, output_tensor_names]) diff --git a/qai_hub_models/models/aotgan/README.md b/qai_hub_models/models/aotgan/README.md index c3b8a37f..0b577a8a 100644 --- a/qai_hub_models/models/aotgan/README.md +++ b/qai_hub_models/models/aotgan/README.md @@ -5,8 +5,7 @@ AOT-GAN is a machine learning model that allows to erase and in-paint part of given input image. -This is based on the implementation of AOT-GAN found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/aotgan). diff --git a/qai_hub_models/models/aotgan/perf.yaml b/qai_hub_models/models/aotgan/perf.yaml index 452662ba..a3cd33d2 100644 --- a/qai_hub_models/models/aotgan/perf.yaml +++ b/qai_hub_models/models/aotgan/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: AOT-GAN performance_metrics: - torchscript_onnx_tflite: - inference_time: 153624.0 - throughput: 6.509399572983388 + inference_time: 152763.0 + throughput: 6.546087730667766 estimated_peak_memory_range: - min: 3317760 - max: 5613240 + min: 3321856 + max: 5803656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: j56yz2dyp + job_id: jp14dm2np job_status: Passed torchscript_onnx_qnn: - inference_time: 153241.0 - throughput: 6.525668717901867 + inference_time: 153270.0 + throughput: 6.524434005350036 estimated_peak_memory_range: - min: 3387392 - max: 26771528 + min: 4255744 + max: 22606960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,7 +70,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 274 - job_id: jp14w2v7p + job_id: j5mnwleyp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -77,13 +79,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:36:09Z' + timestamp: '2024-11-09T23:23:31Z' - torchscript_onnx_tflite: - inference_time: 153075.0 - throughput: 6.532745386248571 + inference_time: 112210.0 + throughput: 8.911861687906603 estimated_peak_memory_range: - min: 4894720 - max: 6924200 + min: 16384 + max: 225636288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -91,14 +93,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jp3j1nwng + job_id: jgdxrmn6p job_status: Passed torchscript_onnx_qnn: - inference_time: 153349.0 - throughput: 6.521072846904773 + inference_time: 112615.0 + throughput: 8.879811747990942 estimated_peak_memory_range: - min: 147456 - max: 27697056 + min: 4304896 + max: 67777808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,22 +108,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 274 - job_id: jgdxqnzzp + job_id: jgn69wlv5 job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:36:10Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T23:23:32Z' - torchscript_onnx_tflite: - inference_time: 138824.0 - throughput: 7.203365412320636 + inference_time: 118640.0 + throughput: 8.428860418071476 estimated_peak_memory_range: - min: 2904064 - max: 225129600 + min: 8192 + max: 87520432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -129,14 +131,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jgo2nz4kp + job_id: j5wedrwz5 job_status: Passed torchscript_onnx_qnn: - inference_time: 120375.0 - throughput: 8.307372793354102 + inference_time: 118610.0 + throughput: 8.430992327796982 estimated_peak_memory_range: - min: 141524992 - max: 205006736 + min: 3158016 + max: 68517904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -144,22 +146,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 274 - job_id: j5we3w145 + job_id: jprv478vg job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:36:11Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T23:23:34Z' - torchscript_onnx_tflite: - inference_time: 118202.0 - throughput: 8.460093737838616 + inference_time: 149861.0 + throughput: 6.672850174495032 estimated_peak_memory_range: - min: 1138688 - max: 88057088 + min: 3301376 + max: 5657312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -167,14 +169,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jg9ly0x8g + job_id: jg9l3q0qg job_status: Passed torchscript_onnx_qnn: - inference_time: 98705.0 - throughput: 10.131199027404893 + inference_time: 92085.0 + throughput: 10.859531954172775 estimated_peak_memory_range: - min: 4255744 - max: 69378192 + min: 4493312 + max: 5644472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -182,22 +184,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 274 - job_id: j5mn6ed7p + job_id: jp2k7z0xp job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:36:19Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T23:23:35Z' - torchscript_onnx_tflite: - inference_time: 152879.0 - throughput: 6.54112075563027 + inference_time: 153081.0 + throughput: 6.532489335711159 estimated_peak_memory_range: - min: 1642496 - max: 3489136 + min: 3387392 + max: 5609568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -205,14 +207,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jpv6rq9r5 + job_id: jp14dm2kp job_status: Passed torchscript_onnx_qnn: - inference_time: 93485.0 - throughput: 10.696903246510136 + inference_time: 92570.0 + throughput: 10.802635843145728 estimated_peak_memory_range: - min: 4444160 - max: 6194160 + min: 4440064 + max: 5711400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -220,22 +222,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 274 - job_id: jg9ly0xmg + job_id: jp0z1x325 job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:36:12Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T23:23:37Z' - torchscript_onnx_tflite: - inference_time: 153002.0 - throughput: 6.535862276310113 + inference_time: 152952.0 + throughput: 6.537998849312203 estimated_peak_memory_range: - min: 3284992 - max: 5390224 + min: 3317760 + max: 5373984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -243,14 +245,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jg9ly17vg + job_id: jgdxrmnkp job_status: Passed torchscript_onnx_qnn: - inference_time: 92540.0 - throughput: 10.80613788631943 + inference_time: 92932.0 + throughput: 10.760556105539534 estimated_peak_memory_range: - min: 4263936 - max: 5515224 + min: 4485120 + max: 5751352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -258,22 +260,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 274 - job_id: j5we3vk65 + job_id: jp8q3k0zp job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T02:00:37Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T23:23:38Z' - torchscript_onnx_tflite: - inference_time: 153237.0 - throughput: 6.525839059757108 + inference_time: 152762.0 + throughput: 6.546130582212854 estimated_peak_memory_range: - min: 1159168 - max: 2905296 + min: 3887104 + max: 6020648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -281,14 +283,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jp14wlklp + job_id: j57yj82q5 job_status: Passed torchscript_onnx_qnn: - inference_time: 92951.0 - throughput: 10.758356553452895 + inference_time: 92470.0 + throughput: 10.814318157240185 estimated_peak_memory_range: - min: 4337664 - max: 8812088 + min: 5095424 + max: 6341544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -296,22 +298,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 274 - job_id: jg9ly1rlg + job_id: jgkelk7yg job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T02:00:38Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T23:23:39Z' - torchscript_onnx_tflite: - inference_time: 152765.0 - throughput: 6.546002029260629 + inference_time: 218814.0 + throughput: 4.570091493231694 estimated_peak_memory_range: - min: 3309568 - max: 5720616 + min: 3313664 + max: 69013616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -319,14 +321,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jgdxq9ylp + job_id: jp4lx2nq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 92654.0 - throughput: 10.792842187061542 + inference_time: 164324.0 + throughput: 6.085538326720382 estimated_peak_memory_range: - min: 4444160 - max: 6201632 + min: 1261568 + max: 7102224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -334,22 +336,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 274 - job_id: jp14wl92p + job_id: j5q67de7p job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T02:00:39Z' + chipset: SA8295P + timestamp: '2024-11-09T23:23:40Z' - torchscript_onnx_tflite: - inference_time: 193876.0 - throughput: 5.157936000330108 + inference_time: 194495.0 + throughput: 5.141520347566775 estimated_peak_memory_range: - min: 3248128 - max: 195376304 + min: 1376256 + max: 193474944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -357,14 +359,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: j5we3w1m5 + job_id: jpxk7z9j5 job_status: Passed torchscript_onnx_qnn: - inference_time: 194886.0 - throughput: 5.131204909536858 + inference_time: 196670.0 + throughput: 5.084659582040983 estimated_peak_memory_range: - min: 4214784 - max: 52661840 + min: 3162112 + max: 51371952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -372,7 +374,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 274 - job_id: jpxk69d85 + job_id: jglv0q6e5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -381,10 +383,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:36:18Z' + timestamp: '2024-11-09T23:23:41Z' - torchscript_onnx_qnn: - inference_time: 96261.0 - throughput: 10.388423141251389 + inference_time: 96297.0 + throughput: 10.384539497595979 estimated_peak_memory_range: min: 4202496 max: 4202496 @@ -395,7 +397,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 274 - job_id: jp14w2vnp + job_id: jpy14yrrp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -404,4 +406,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:36:14Z' + timestamp: '2024-11-09T23:23:36Z' diff --git a/qai_hub_models/models/baichuan2_7b_quantized/README.md b/qai_hub_models/models/baichuan2_7b_quantized/README.md index 41a9b966..b38a0ff0 100644 --- a/qai_hub_models/models/baichuan2_7b_quantized/README.md +++ b/qai_hub_models/models/baichuan2_7b_quantized/README.md @@ -5,8 +5,7 @@ Baichuan2-7B is a family of LLMs. It achieves the state-of-the-art performance of its size on standard Chinese and English authoritative benchmarks (C-EVAL/MMLU). 4-bit weights and 16-bit activations making it suitable for on-device deployment. For Prompt and output length specified below, the time to first token is Baichuan2-PromptProcessor-Quantized's latency and average time per addition token is Baichuan2-TokenGenerator-Quantized's latency. -This is based on the implementation of Baichuan2-7B found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/baichuan2_7b_quantized). diff --git a/qai_hub_models/models/controlnet_quantized/README.md b/qai_hub_models/models/controlnet_quantized/README.md index 08c419b4..8370f32c 100644 --- a/qai_hub_models/models/controlnet_quantized/README.md +++ b/qai_hub_models/models/controlnet_quantized/README.md @@ -5,8 +5,7 @@ On-device, high-resolution image synthesis from text and image prompts. ControlNet guides Stable-diffusion with provided input image to generate accurate images from given input prompt. -This is based on the implementation of ControlNet found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/controlnet_quantized). diff --git a/qai_hub_models/models/controlnet_quantized/requirements.txt b/qai_hub_models/models/controlnet_quantized/requirements.txt index 30d9a5e4..047c5390 100644 --- a/qai_hub_models/models/controlnet_quantized/requirements.txt +++ b/qai_hub_models/models/controlnet_quantized/requirements.txt @@ -1,2 +1,2 @@ transformers==4.41.1 -diffusers[torch]==0.21.4 +diffusers[torch]==0.31.0 diff --git a/qai_hub_models/models/convnext_tiny/README.md b/qai_hub_models/models/convnext_tiny/README.md index efee0bb2..a090cc3f 100644 --- a/qai_hub_models/models/convnext_tiny/README.md +++ b/qai_hub_models/models/convnext_tiny/README.md @@ -5,8 +5,7 @@ ConvNextTiny is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of ConvNext-Tiny found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/convnext_tiny). diff --git a/qai_hub_models/models/convnext_tiny/perf.yaml b/qai_hub_models/models/convnext_tiny/perf.yaml index 2ae45068..3db26753 100644 --- a/qai_hub_models/models/convnext_tiny/perf.yaml +++ b/qai_hub_models/models/convnext_tiny/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: ConvNext-Tiny performance_metrics: - torchscript_onnx_tflite: - inference_time: 3400.0 - throughput: 294.11764705882354 + inference_time: 3360.0 + throughput: 297.6190476190476 estimated_peak_memory_range: - min: 28672 - max: 2426352 + min: 20480 + max: 2259144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 328 - job_id: jp8qm28op + job_id: jpedrnd85 job_status: Passed torchscript_onnx_qnn: - inference_time: 3948.0 - throughput: 253.29280648429585 + inference_time: 3919.0 + throughput: 255.16713447307987 estimated_peak_memory_range: min: 618496 - max: 156276608 + max: 197777016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 232 - job_id: jpedwo8v5 + job_id: j5mnwln7p job_status: Passed torchscript_onnx: - inference_time: 13431.0 - throughput: 74.45461990916536 + inference_time: 13496.0 + throughput: 74.09602845287493 estimated_peak_memory_range: min: 12288 - max: 69359400 + max: 68483448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jgn6m0oq5 + job_id: j56y302np job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:35:24Z' + timestamp: '2024-11-09T23:22:43Z' - torchscript_onnx_tflite: - inference_time: 3379.0 - throughput: 295.9455460195324 + inference_time: 2459.0 + throughput: 406.669377795852 estimated_peak_memory_range: min: 16384 - max: 2511232 + max: 221864816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 328 - job_id: jgkeqvdng + job_id: jgz3x0345 job_status: Passed torchscript_onnx_qnn: - inference_time: 3930.0 - throughput: 254.4529262086514 + inference_time: 2835.0 + throughput: 352.7336860670194 estimated_peak_memory_range: - min: 622592 - max: 124613160 + min: 0 + max: 36711488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 232 - job_id: jgz3j28x5 + job_id: jgn69w6j5 job_status: Passed torchscript_onnx: - inference_time: 13355.0 - throughput: 74.87832272557095 + inference_time: 9529.0 + throughput: 104.942806170637 estimated_peak_memory_range: - min: 630784 - max: 2347832 + min: 651264 + max: 389235360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jprv26o7g + job_id: jp3j4rnmg job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:35:25Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T23:22:44Z' - torchscript_onnx_tflite: - inference_time: 2846.0 - throughput: 351.37034434293747 + inference_time: 2141.0 + throughput: 467.07146193367583 estimated_peak_memory_range: - min: 16384 - max: 218080032 + min: 12288 + max: 64042912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 328 - job_id: j5q6r0wop + job_id: j5wedre45 job_status: Passed torchscript_onnx_qnn: - inference_time: 3298.0 - throughput: 303.21406913280777 + inference_time: 2437.0 + throughput: 410.3405826836274 estimated_peak_memory_range: - min: 618496 - max: 34326288 + min: 614400 + max: 37316368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 232 - job_id: j5we3w8m5 + job_id: jprv47vkg job_status: Passed torchscript_onnx: - inference_time: 9818.0 - throughput: 101.85373803218579 + inference_time: 8676.0 + throughput: 115.26048870447211 estimated_peak_memory_range: min: 0 - max: 387935760 + max: 131064096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jp2k9x4qp + job_id: jgo219z1p job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:35:26Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T23:22:45Z' - torchscript_onnx_tflite: - inference_time: 2119.0 - throughput: 471.92071731949034 + inference_time: 3360.0 + throughput: 297.6190476190476 estimated_peak_memory_range: - min: 12288 - max: 64057760 + min: 20480 + max: 2255896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 328 - job_id: jgjv2doeg + job_id: jg9l3qlmg job_status: Passed torchscript_onnx_qnn: - inference_time: 2505.0 - throughput: 399.2015968063872 + inference_time: 3624.0 + throughput: 275.9381898454746 estimated_peak_memory_range: - min: 614400 - max: 38604208 + min: 626688 + max: 1927168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 232 - job_id: j5mn6eo9p - job_status: Passed - torchscript_onnx: - inference_time: 8675.0 - throughput: 115.27377521613833 - estimated_peak_memory_range: - min: 651264 - max: 132714768 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 198 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 198 - job_id: jp0z24dn5 + job_id: jp2k7zk6p job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:35:28Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T23:22:36Z' - torchscript_onnx_tflite: - inference_time: 3366.0 - throughput: 297.08853238265004 + inference_time: 3396.0 + throughput: 294.4640753828033 estimated_peak_memory_range: - min: 860160 - max: 71542744 + min: 20480 + max: 1987976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 328 - job_id: jglv24om5 + job_id: jp14dm4np job_status: Passed torchscript_onnx_qnn: - inference_time: 3630.0 - throughput: 275.4820936639118 + inference_time: 3703.0 + throughput: 270.0513097488523 estimated_peak_memory_range: - min: 626688 - max: 1818616 + min: 634880 + max: 2321912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 232 - job_id: jg9ly0k8g + job_id: jp0z1x405 job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:35:15Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T23:22:38Z' - torchscript_onnx_tflite: - inference_time: 3360.0 - throughput: 297.6190476190476 + inference_time: 3361.0 + throughput: 297.53049687592977 estimated_peak_memory_range: - min: 16384 - max: 2282552 + min: 28672 + max: 2389248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 328 - job_id: jglv2z9l5 + job_id: jgdxrmx6p job_status: Passed torchscript_onnx_qnn: - inference_time: 3657.0 - throughput: 273.4481815695926 + inference_time: 3673.0 + throughput: 272.2570106180234 estimated_peak_memory_range: - min: 643072 - max: 2226480 + min: 626688 + max: 1852560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 232 - job_id: jgo2n07dp + job_id: jp8q3k2qp job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T02:00:02Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T23:22:39Z' - torchscript_onnx_tflite: - inference_time: 3382.0 - throughput: 295.68302779420463 + inference_time: 3352.0 + throughput: 298.32935560859187 estimated_peak_memory_range: - min: 16384 - max: 2530160 + min: 20480 + max: 20308752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 328 - job_id: j56yzj97p + job_id: j57yj8yn5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3664.0 - throughput: 272.92576419213975 + inference_time: 3631.0 + throughput: 275.40622418066647 estimated_peak_memory_range: - min: 634880 - max: 2018848 + min: 663552 + max: 2071456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 232 - job_id: jpv6roym5 + job_id: jgkelkvvg job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T02:00:03Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T23:22:40Z' - torchscript_onnx_tflite: - inference_time: 3369.0 - throughput: 296.8239833778569 + inference_time: 10471.0 + throughput: 95.50186228631458 estimated_peak_memory_range: min: 20480 - max: 2686000 + max: 55516880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 328 - job_id: jp3j13lzg + job_id: jp4lx2l25 job_status: Passed torchscript_onnx_qnn: - inference_time: 3678.0 - throughput: 271.8868950516585 + inference_time: 9481.0 + throughput: 105.47410610695074 estimated_peak_memory_range: - min: 634880 - max: 1794056 + min: 643072 + max: 6473680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 232 - job_id: jgjv2m68g + job_id: j5q67d0ep job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T02:00:04Z' + chipset: SA8295P + timestamp: '2024-11-09T23:22:41Z' - torchscript_onnx_tflite: - inference_time: 9215.0 - throughput: 108.51871947911015 + inference_time: 9193.0 + throughput: 108.77841836179702 estimated_peak_memory_range: - min: 65536 - max: 211649760 + min: 16384 + max: 208926016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 328 - job_id: jpv6rqer5 + job_id: jpxk7zk85 job_status: Passed torchscript_onnx_qnn: - inference_time: 9848.0 - throughput: 101.54346060113728 + inference_time: 9845.0 + throughput: 101.57440325038091 estimated_peak_memory_range: min: 0 - max: 32175744 + max: 35311952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 232 - job_id: jpxk693l5 + job_id: jglv0q425 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:35:21Z' + timestamp: '2024-11-09T23:22:42Z' - torchscript_onnx_qnn: - inference_time: 3887.0 - throughput: 257.2678157962439 + inference_time: 3898.0 + throughput: 256.5418163160595 estimated_peak_memory_range: min: 602112 max: 602112 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 232 - job_id: jp14w277p + job_id: jpy14y10p job_status: Passed torchscript_onnx: - inference_time: 16227.0 - throughput: 61.625685585752144 + inference_time: 16269.0 + throughput: 61.46659290675518 estimated_peak_memory_range: - min: 60100608 - max: 60100608 + min: 61018112 + max: 61018112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jpy1jzqlp + job_id: jpv61nqz5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:35:27Z' + timestamp: '2024-11-09T23:22:46Z' diff --git a/qai_hub_models/models/convnext_tiny_w8a16_quantized/README.md b/qai_hub_models/models/convnext_tiny_w8a16_quantized/README.md index 613e7dde..d7c7199b 100644 --- a/qai_hub_models/models/convnext_tiny_w8a16_quantized/README.md +++ b/qai_hub_models/models/convnext_tiny_w8a16_quantized/README.md @@ -5,8 +5,7 @@ ConvNextTiny is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of ConvNext-Tiny-w8a16-Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/convnext_tiny_w8a16_quantized). diff --git a/qai_hub_models/models/convnext_tiny_w8a16_quantized/perf.yaml b/qai_hub_models/models/convnext_tiny_w8a16_quantized/perf.yaml index 9d67e8b9..e59a396d 100644 --- a/qai_hub_models/models/convnext_tiny_w8a16_quantized/perf.yaml +++ b/qai_hub_models/models/convnext_tiny_w8a16_quantized/perf.yaml @@ -43,11 +43,11 @@ models: - name: ConvNext-Tiny-w8a16-Quantized performance_metrics: - torchscript_onnx_qnn: - inference_time: 3626.0 - throughput: 275.78599007170436 + inference_time: 3585.0 + throughput: 278.9400278940028 estimated_peak_memory_range: - min: 73728 - max: 20730336 + min: 12288 + max: 19614960 primary_compute_unit: NPU precision: int8 layer_info: @@ -55,7 +55,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jgo2nzmkp + job_id: jp4lx2r25 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -64,13 +64,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:34:29Z' + timestamp: '2024-11-09T23:21:51Z' - torchscript_onnx_qnn: - inference_time: 3611.0 - throughput: 276.93159789531984 + inference_time: 2618.0 + throughput: 381.9709702062643 estimated_peak_memory_range: - min: 12288 - max: 20814568 + min: 0 + max: 35401808 primary_compute_unit: NPU precision: int8 layer_info: @@ -78,22 +78,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jpv6rq4r5 + job_id: jpxk7zo85 job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:34:30Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T23:21:52Z' - torchscript_onnx_qnn: - inference_time: 2407.0 - throughput: 415.45492314083924 + inference_time: 2456.0 + throughput: 407.1661237785016 estimated_peak_memory_range: - min: 311296 - max: 36928976 + min: 0 + max: 34974288 primary_compute_unit: NPU precision: int8 layer_info: @@ -101,7 +101,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: j57yl2695 + job_id: j5mnwlx7p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -110,13 +110,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:34:40Z' + timestamp: '2024-11-09T23:21:53Z' - torchscript_onnx_qnn: - inference_time: 12962.0 - throughput: 77.14858818083628 + inference_time: 13316.0 + throughput: 75.09762691498949 estimated_peak_memory_range: - min: 315392 - max: 8359824 + min: 335872 + max: 8357920 primary_compute_unit: NPU precision: int8 layer_info: @@ -124,7 +124,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jgjv2d1eg + job_id: jgn69wvj5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -133,13 +133,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-27T00:34:32Z' + timestamp: '2024-11-09T23:21:55Z' - torchscript_onnx_qnn: - inference_time: 3181.0 - throughput: 314.36655139893116 + inference_time: 3176.0 + throughput: 314.86146095717885 estimated_peak_memory_range: - min: 335872 - max: 1619152 + min: 339968 + max: 1513832 primary_compute_unit: NPU precision: int8 layer_info: @@ -147,7 +147,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jpedwo2v5 + job_id: jprv473kg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -156,13 +156,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:34:33Z' + timestamp: '2024-11-09T23:21:56Z' - torchscript_onnx_qnn: - inference_time: 3198.0 - throughput: 312.6954346466542 + inference_time: 3193.0 + throughput: 313.18509238960223 estimated_peak_memory_range: min: 339968 - max: 1613304 + max: 1816752 primary_compute_unit: NPU precision: int8 layer_info: @@ -170,7 +170,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jpy1jwk4p + job_id: jpy14y30p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -179,13 +179,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:59:42Z' + timestamp: '2024-11-09T23:21:58Z' - torchscript_onnx_qnn: - inference_time: 3204.0 - throughput: 312.10986267166044 + inference_time: 3188.0 + throughput: 313.6762860727729 estimated_peak_memory_range: - min: 339968 - max: 1728744 + min: 331776 + max: 1866984 primary_compute_unit: NPU precision: int8 layer_info: @@ -193,7 +193,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jp0z2q8e5 + job_id: jp0z1xz05 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -202,13 +202,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:59:43Z' + timestamp: '2024-11-09T23:21:59Z' - torchscript_onnx_qnn: - inference_time: 3201.0 - throughput: 312.40237425804435 + inference_time: 3203.0 + throughput: 312.2073056509522 estimated_peak_memory_range: - min: 339968 - max: 1989208 + min: 348160 + max: 2130552 primary_compute_unit: NPU precision: int8 layer_info: @@ -216,7 +216,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jgkeqnwog + job_id: jp8q3kqqp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -225,13 +225,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:59:45Z' + timestamp: '2024-11-09T23:22:00Z' - torchscript_onnx_qnn: - inference_time: 4665.0 - throughput: 214.36227224008576 + inference_time: 4760.0 + throughput: 210.08403361344537 estimated_peak_memory_range: - min: 319488 - max: 6253808 + min: 356352 + max: 6223888 primary_compute_unit: NPU precision: int8 layer_info: @@ -239,7 +239,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jp8qm9d8p + job_id: jgkelkevg job_status: Passed reference_device_info: name: SA8295P ADP @@ -248,13 +248,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T01:59:44Z' + timestamp: '2024-11-09T23:22:02Z' - torchscript_onnx_qnn: - inference_time: 4232.0 - throughput: 236.29489603024575 + inference_time: 4253.0 + throughput: 235.12814483893723 estimated_peak_memory_range: min: 315392 - max: 40129040 + max: 40221776 primary_compute_unit: NPU precision: int8 layer_info: @@ -262,7 +262,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jgdxqn0zp + job_id: j5q67d6ep job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -271,13 +271,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:34:39Z' + timestamp: '2024-11-09T23:22:03Z' - torchscript_onnx_qnn: - inference_time: 3500.0 - throughput: 285.7142857142857 + inference_time: 3543.0 + throughput: 282.2466836014677 estimated_peak_memory_range: - min: 327680 - max: 327680 + min: 311296 + max: 311296 primary_compute_unit: NPU precision: int8 layer_info: @@ -285,7 +285,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jgz3j2wx5 + job_id: jp2k7zy6p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -294,4 +294,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:34:34Z' + timestamp: '2024-11-09T23:21:57Z' diff --git a/qai_hub_models/models/convnext_tiny_w8a8_quantized/README.md b/qai_hub_models/models/convnext_tiny_w8a8_quantized/README.md index 7eac8a6d..bb667ecb 100644 --- a/qai_hub_models/models/convnext_tiny_w8a8_quantized/README.md +++ b/qai_hub_models/models/convnext_tiny_w8a8_quantized/README.md @@ -5,8 +5,7 @@ ConvNextTiny is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of ConvNext-Tiny-w8a8-Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/convnext_tiny_w8a8_quantized). diff --git a/qai_hub_models/models/convnext_tiny_w8a8_quantized/perf.yaml b/qai_hub_models/models/convnext_tiny_w8a8_quantized/perf.yaml index 3e10872a..ed91c7b3 100644 --- a/qai_hub_models/models/convnext_tiny_w8a8_quantized/perf.yaml +++ b/qai_hub_models/models/convnext_tiny_w8a8_quantized/perf.yaml @@ -18,6 +18,8 @@ aggregated: - Samsung Galaxy S21+ - Snapdragon X Elite CRD - Snapdragon X Plus 8-Core CRD + - QCS8250 (Proxy) + - RB5 (Proxy) - QCS8450 (Proxy) - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) @@ -33,6 +35,7 @@ aggregated: - Snapdragon® 888 - Snapdragon® X Elite - Snapdragon® X Plus 8-Core + - QCS8250 Proxy - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy @@ -42,35 +45,12 @@ aggregated: models: - name: ConvNext-Tiny-w8a8-Quantized performance_metrics: - - torchscript_onnx_qnn: - inference_time: 1741.0 - throughput: 574.3825387708214 - estimated_peak_memory_range: - min: 20480 - max: 296366856 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 215 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 215 - job_id: jp8qm03kp - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:54:04Z' - torchscript_onnx_qnn: inference_time: 1744.0 throughput: 573.394495412844 estimated_peak_memory_range: - min: 32768 - max: 295441744 + min: 16384 + max: 295345976 primary_compute_unit: NPU precision: int8 layer_info: @@ -78,7 +58,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jgkeq7lwg + job_id: jgdxryrlp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +67,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:54:05Z' + timestamp: '2024-11-09T23:52:59Z' - torchscript_onnx_qnn: - inference_time: 1569.0 - throughput: 637.3486297004462 + inference_time: 1224.0 + throughput: 816.9934640522875 estimated_peak_memory_range: - min: 0 - max: 25623696 + min: 163840 + max: 23151024 primary_compute_unit: NPU precision: int8 layer_info: @@ -101,7 +81,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: j5q6re7np + job_id: j57yj1vr5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,13 +90,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:54:07Z' + timestamp: '2024-11-09T23:53:01Z' - torchscript_onnx_qnn: - inference_time: 1154.0 - throughput: 866.5511265164645 + inference_time: 1162.0 + throughput: 860.5851979345955 estimated_peak_memory_range: min: 159744 - max: 28377568 + max: 28439792 primary_compute_unit: NPU precision: int8 layer_info: @@ -124,7 +104,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jgz3jovo5 + job_id: jp4lx6jl5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -133,13 +113,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:54:22Z' + timestamp: '2024-11-09T23:53:03Z' - torchscript_onnx_qnn: - inference_time: 6542.0 - throughput: 152.8584530724549 + inference_time: 6690.0 + throughput: 149.47683109118086 estimated_peak_memory_range: - min: 163840 - max: 8255216 + min: 192512 + max: 8241424 primary_compute_unit: NPU precision: int8 layer_info: @@ -147,7 +127,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jglv260j5 + job_id: jpxk78e95 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -156,13 +136,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-27T00:54:09Z' + timestamp: '2024-11-09T23:53:05Z' - torchscript_onnx_qnn: - inference_time: 1672.0 - throughput: 598.0861244019138 + inference_time: 1671.0 + throughput: 598.4440454817475 estimated_peak_memory_range: - min: 184320 - max: 1393488 + min: 176128 + max: 1717576 primary_compute_unit: NPU precision: int8 layer_info: @@ -170,7 +150,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: j56yze36p + job_id: j5mnw1vqp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -179,13 +159,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:54:11Z' + timestamp: '2024-11-09T23:53:06Z' - torchscript_onnx_qnn: inference_time: 1672.0 throughput: 598.0861244019138 estimated_peak_memory_range: min: 184320 - max: 1925640 + max: 1485840 primary_compute_unit: NPU precision: int8 layer_info: @@ -193,7 +173,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jp8qmv2kp + job_id: jprv4m1eg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -202,13 +182,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T02:09:02Z' + timestamp: '2024-11-09T23:53:11Z' - torchscript_onnx_qnn: - inference_time: 1674.0 - throughput: 597.3715651135007 + inference_time: 1672.0 + throughput: 598.0861244019138 estimated_peak_memory_range: min: 184320 - max: 1569464 + max: 1477152 primary_compute_unit: NPU precision: int8 layer_info: @@ -216,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jgkeqmvwg + job_id: jp2k7q3mp job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -225,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T02:09:04Z' + timestamp: '2024-11-09T23:53:13Z' - torchscript_onnx_qnn: - inference_time: 1677.0 - throughput: 596.3029218843172 + inference_time: 1674.0 + throughput: 597.3715651135007 estimated_peak_memory_range: - min: 204800 - max: 1458056 + min: 229376 + max: 1480728 primary_compute_unit: NPU precision: int8 layer_info: @@ -239,7 +219,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jglv2r4j5 + job_id: jpy14kv4p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -248,13 +228,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T02:09:07Z' + timestamp: '2024-11-09T23:53:14Z' - torchscript_onnx_qnn: - inference_time: 2834.0 - throughput: 352.85815102328866 + inference_time: 2847.0 + throughput: 351.24692658939233 estimated_peak_memory_range: - min: 163840 - max: 6018656 + min: 0 + max: 5850752 primary_compute_unit: NPU precision: int8 layer_info: @@ -262,7 +242,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: j5q6ro0np + job_id: jp0z18ee5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -271,13 +251,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T02:09:06Z' + timestamp: '2024-11-09T23:53:16Z' - torchscript_onnx_qnn: - inference_time: 2138.0 - throughput: 467.7268475210477 + inference_time: 2136.0 + throughput: 468.1647940074906 estimated_peak_memory_range: - min: 0 - max: 26512976 + min: 163840 + max: 28507712 primary_compute_unit: NPU precision: int8 layer_info: @@ -285,7 +265,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jpedwe4o5 + job_id: jp8q3dw8p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -294,13 +274,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:54:20Z' + timestamp: '2024-11-09T23:53:18Z' - torchscript_onnx_qnn: - inference_time: 1876.0 - throughput: 533.0490405117271 + inference_time: 1859.0 + throughput: 537.9236148466917 estimated_peak_memory_range: - min: 544768 - max: 544768 + min: 438272 + max: 438272 primary_compute_unit: NPU precision: int8 layer_info: @@ -308,7 +288,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jp3j1v43g + job_id: jgn69drm5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -317,4 +297,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:54:12Z' + timestamp: '2024-11-09T23:53:09Z' diff --git a/qai_hub_models/models/ddrnet23_slim/README.md b/qai_hub_models/models/ddrnet23_slim/README.md index f4c0a382..26acb767 100644 --- a/qai_hub_models/models/ddrnet23_slim/README.md +++ b/qai_hub_models/models/ddrnet23_slim/README.md @@ -5,8 +5,7 @@ DDRNet23Slim is a machine learning model that segments an image into semantic classes, specifically designed for road-based scenes. It is designed for the application of self-driving cars. -This is based on the implementation of DDRNet23-Slim found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/ddrnet23_slim). diff --git a/qai_hub_models/models/ddrnet23_slim/export.py b/qai_hub_models/models/ddrnet23_slim/export.py index 2592b1f0..89f12022 100644 --- a/qai_hub_models/models/ddrnet23_slim/export.py +++ b/qai_hub_models/models/ddrnet23_slim/export.py @@ -205,9 +205,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser( - model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False - ) + parser = export_parser(model_cls=Model, supports_qnn=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/ddrnet23_slim/perf.yaml b/qai_hub_models/models/ddrnet23_slim/perf.yaml index c9bbb0bd..a66b3a58 100644 --- a/qai_hub_models/models/ddrnet23_slim/perf.yaml +++ b/qai_hub_models/models/ddrnet23_slim/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: DDRNet23-Slim performance_metrics: - torchscript_onnx_tflite: - inference_time: 5081.0 - throughput: 196.811651249754 + inference_time: 5131.0 + throughput: 194.89378288832586 estimated_peak_memory_range: - min: 1011712 - max: 3494392 + min: 2121728 + max: 4673168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: jglv24em5 + job_id: jp14d1xnp job_status: Passed torchscript_onnx: - inference_time: 7348.0 - throughput: 136.09145345672292 + inference_time: 7388.0 + throughput: 135.3546291283162 estimated_peak_memory_range: - min: 9850880 - max: 15613624 + min: 9936896 + max: 14724368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,7 +70,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 155 - job_id: jpy1jzvlp + job_id: jgjv08n1g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -77,13 +79,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:33:46Z' + timestamp: '2024-11-09T23:21:06Z' - torchscript_onnx_tflite: - inference_time: 5218.0 - throughput: 191.64430816404752 + inference_time: 3489.0 + throughput: 286.61507595299514 estimated_peak_memory_range: - min: 1015808 - max: 3618056 + min: 991232 + max: 80564912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -91,14 +93,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: j56yz2qyp + job_id: jgdxr4l6p job_status: Passed torchscript_onnx: - inference_time: 7343.0 - throughput: 136.1841209314994 + inference_time: 4952.0 + throughput: 201.93861066235866 estimated_peak_memory_range: - min: 9871360 - max: 95733424 + min: 11870208 + max: 106303120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,45 +108,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 155 - job_id: jp0z24en5 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:33:47Z' - - torchscript_onnx_tflite: - inference_time: 4534.0 - throughput: 220.55580061755623 - estimated_peak_memory_range: - min: 1003520 - max: 80198816 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 131 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 131 - job_id: jp3j1nqng - job_status: Passed - torchscript_onnx: - inference_time: 5628.0 - throughput: 177.68301350390902 - estimated_peak_memory_range: - min: 503808 - max: 94007264 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 155 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 155 - job_id: jp8qm2wop + job_id: jpedrnm85 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -153,13 +117,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:33:48Z' + timestamp: '2024-11-09T23:21:07Z' - torchscript_onnx_tflite: - inference_time: 3470.0 - throughput: 288.1844380403458 + inference_time: 3453.0 + throughput: 289.6032435563278 estimated_peak_memory_range: min: 8192 - max: 40718224 + max: 40454400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -167,14 +131,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: j5we3wmm5 + job_id: j57yjn3n5 job_status: Passed torchscript_onnx: - inference_time: 4122.0 - throughput: 242.600679281902 + inference_time: 4990.0 + throughput: 200.40080160320642 estimated_peak_memory_range: - min: 11804672 - max: 58182160 + min: 11862016 + max: 58183344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -182,7 +146,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 155 - job_id: j5q6r09op + job_id: jgz3x0d45 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -191,13 +155,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:33:50Z' + timestamp: '2024-11-09T23:21:08Z' - torchscript_onnx_tflite: - inference_time: 5126.0 - throughput: 195.08388607101054 + inference_time: 5068.0 + throughput: 197.3164956590371 estimated_peak_memory_range: - min: 999424 - max: 2725816 + min: 245760 + max: 1795264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -205,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: jgo2nzekp + job_id: jp4lx4025 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -214,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:33:29Z' + timestamp: '2024-11-09T23:20:48Z' - torchscript_onnx_tflite: - inference_time: 5140.0 - throughput: 194.55252918287937 + inference_time: 5131.0 + throughput: 194.89378288832586 estimated_peak_memory_range: - min: 1024000 - max: 2836808 + min: 1040384 + max: 2443512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -228,7 +192,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: jgdxq9mlp + job_id: jpxk7r285 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -237,13 +201,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:59:02Z' + timestamp: '2024-11-09T23:20:49Z' - torchscript_onnx_tflite: - inference_time: 5039.0 - throughput: 198.45207382417146 + inference_time: 5185.0 + throughput: 192.86403085824494 estimated_peak_memory_range: - min: 1011712 - max: 3384552 + min: 16384 + max: 1978608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -251,7 +215,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: j57ylw1r5 + job_id: j5mnwky7p job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -260,13 +224,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:59:03Z' + timestamp: '2024-11-09T23:20:50Z' - torchscript_onnx_tflite: - inference_time: 5152.0 - throughput: 194.09937888198758 + inference_time: 5131.0 + throughput: 194.89378288832586 estimated_peak_memory_range: - min: 1015808 - max: 3615464 + min: 61440 + max: 2624352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -274,7 +238,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: jp4ldo6l5 + job_id: jgn69q8j5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -283,13 +247,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:59:04Z' + timestamp: '2024-11-09T23:20:51Z' + - torchscript_onnx_tflite: + inference_time: 8857.0 + throughput: 112.90504685559445 + estimated_peak_memory_range: + min: 1007616 + max: 32252848 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 131 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 131 + job_id: jprv4djkg + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T23:20:52Z' - torchscript_onnx_tflite: - inference_time: 7542.0 - throughput: 132.59082471492974 + inference_time: 7549.0 + throughput: 132.46787653993906 estimated_peak_memory_range: - min: 6037504 - max: 75046352 + min: 1789952 + max: 68303744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -297,7 +284,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: jgz3j2vx5 + job_id: jp2k7dn6p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -306,13 +293,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:33:33Z' + timestamp: '2024-11-09T23:20:53Z' - torchscript_onnx: - inference_time: 8309.0 - throughput: 120.35142616440005 + inference_time: 8362.0 + throughput: 119.58861516383641 estimated_peak_memory_range: - min: 9863168 - max: 9863168 + min: 9859072 + max: 9859072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -320,7 +307,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 155 - job_id: jgkeqvrng + job_id: j5wedr645 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -329,4 +316,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:33:49Z' + timestamp: '2024-11-09T23:21:09Z' diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet/README.md b/qai_hub_models/models/deeplabv3_plus_mobilenet/README.md index ed83d022..83612f64 100644 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet/README.md +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet/README.md @@ -5,8 +5,7 @@ DeepLabV3 is designed for semantic segmentation at multiple scales, trained on the various datasets. It uses MobileNet as a backbone. -This is based on the implementation of DeepLabV3-Plus-MobileNet found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/deeplabv3_plus_mobilenet). diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet/evaluate.py b/qai_hub_models/models/deeplabv3_plus_mobilenet/evaluate.py new file mode 100644 index 00000000..53c08e2b --- /dev/null +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.deeplabv3_plus_mobilenet import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["pascal_voc"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=400, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet/perf.yaml b/qai_hub_models/models/deeplabv3_plus_mobilenet/perf.yaml index 875635f6..ae64fb4f 100644 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet/perf.yaml +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,55 +36,56 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: DeepLabV3-Plus-MobileNet performance_metrics: - torchscript_onnx_tflite: - inference_time: 13309.0 - throughput: 75.1371252535878 + inference_time: 13629.0 + throughput: 73.37295472888694 estimated_peak_memory_range: - min: 22147072 - max: 24387440 + min: 286720 + max: 5166968 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 98 + layers_on_npu: 101 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 98 - job_id: jglv24kj5 + total_layers: 101 + job_id: jp14d1l7p job_status: Passed torchscript_onnx_qnn: - inference_time: 13097.0 - throughput: 76.35336336565626 + inference_time: 13941.0 + throughput: 71.7308657915501 estimated_peak_memory_range: - min: 3198976 - max: 21881216 + min: 3297280 + max: 24370232 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 124 + layers_on_npu: 126 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 124 - job_id: jg9ly03wg + total_layers: 126 + job_id: j5mnwk27p job_status: Passed torchscript_onnx: - inference_time: 17556.0 - throughput: 56.96058327637275 + inference_time: 11976.0 + throughput: 83.500334001336 estimated_peak_memory_range: - min: 47452160 - max: 49504920 + min: 32768 + max: 12914520 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 126 + layers_on_npu: 127 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 126 - job_id: j5mn6ew9p + total_layers: 127 + job_id: j56y3mlnp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,149 +94,134 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:33:03Z' + timestamp: '2024-11-09T23:20:21Z' - torchscript_onnx_tflite: - inference_time: 13236.0 - throughput: 75.55152614082805 + inference_time: 9523.0 + throughput: 105.00892575868949 estimated_peak_memory_range: - min: 21245952 - max: 23976528 + min: 315392 + max: 91759424 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 98 + layers_on_npu: 101 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 98 - job_id: j56yz216p + total_layers: 101 + job_id: jgdxr49zp job_status: Passed torchscript_onnx_qnn: - inference_time: 13002.0 - throughput: 76.91124442393478 + inference_time: 9686.0 + throughput: 103.24179227751394 estimated_peak_memory_range: - min: 6303744 - max: 25066256 + min: 3272704 + max: 37069472 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 124 + layers_on_npu: 126 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 124 - job_id: jp14w2d8p + total_layers: 126 + job_id: jgn69qyj5 job_status: Passed torchscript_onnx: - inference_time: 17190.0 - throughput: 58.17335660267597 + inference_time: 8268.0 + throughput: 120.94823415578132 estimated_peak_memory_range: - min: 44822528 - max: 58663320 + min: 3825664 + max: 96181824 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 126 + layers_on_npu: 127 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 126 - job_id: jgn6m09q5 + total_layers: 127 + job_id: jp3j472mg job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:33:04Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T23:20:22Z' - torchscript_onnx_tflite: - inference_time: 11717.0 - throughput: 85.34607834769993 + inference_time: 9410.0 + throughput: 106.26992561105207 estimated_peak_memory_range: - min: 21209088 - max: 101335680 + min: 303104 + max: 46949632 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 98 + layers_on_npu: 101 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 98 - job_id: jp3j1nm3g + total_layers: 101 + job_id: j5wedzv45 job_status: Passed torchscript_onnx_qnn: - inference_time: 10741.0 - throughput: 93.10120100549297 + inference_time: 9636.0 + throughput: 103.77750103777501 estimated_peak_memory_range: - min: 3174400 - max: 25355008 + min: 3256320 + max: 33740112 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 124 + layers_on_npu: 126 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 124 - job_id: jgdxqnrrp + total_layers: 126 + job_id: jprv4dqkg job_status: Passed torchscript_onnx: - inference_time: 15471.0 - throughput: 64.6370628918622 + inference_time: 6918.0 + throughput: 144.55044810638913 estimated_peak_memory_range: - min: 872448 - max: 85529136 + min: 0 + max: 48274640 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 126 + layers_on_npu: 127 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 126 - job_id: jprv2647g + total_layers: 127 + job_id: jgo21wq1p job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:33:05Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T23:20:23Z' - torchscript_onnx_tflite: - inference_time: 7834.0 - throughput: 127.64871074802144 + inference_time: 13641.0 + throughput: 73.30840847445202 estimated_peak_memory_range: - min: 20840448 - max: 59861136 + min: 0 + max: 2094688 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 98 + layers_on_npu: 101 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 98 - job_id: j5we3wd35 + total_layers: 101 + job_id: jg9l321mg job_status: Passed torchscript_onnx_qnn: - inference_time: 9242.0 - throughput: 108.20168794633196 - estimated_peak_memory_range: - min: 3178496 - max: 27988384 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 124 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 124 - job_id: jpxk697l5 - job_status: Passed - torchscript_onnx: - inference_time: 11916.0 - throughput: 83.92077878482712 + inference_time: 12185.0 + throughput: 82.06811653672548 estimated_peak_memory_range: - min: 50262016 - max: 91039824 + min: 3289088 + max: 4550536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -242,197 +229,197 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jpy1jz4lp + job_id: jp2k7d66p job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:33:07Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T23:20:14Z' - torchscript_onnx_tflite: - inference_time: 13055.0 - throughput: 76.59900421294523 + inference_time: 13610.0 + throughput: 73.47538574577517 estimated_peak_memory_range: - min: 22122496 - max: 26914480 + min: 294912 + max: 2594440 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 98 + layers_on_npu: 101 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 98 - job_id: jgo2nzvqp + total_layers: 101 + job_id: jp14d1lnp job_status: Passed torchscript_onnx_qnn: - inference_time: 12061.0 - throughput: 82.91186468783683 + inference_time: 12319.0 + throughput: 81.17542008279892 estimated_peak_memory_range: - min: 3186688 - max: 4817736 + min: 3289088 + max: 4663968 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 124 + layers_on_npu: 126 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 124 - job_id: j5we3wdm5 + total_layers: 126 + job_id: jp0z19705 job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:32:55Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T23:20:16Z' - torchscript_onnx_tflite: - inference_time: 13250.0 - throughput: 75.47169811320755 + inference_time: 13676.0 + throughput: 73.12079555425564 estimated_peak_memory_range: - min: 22528000 - max: 24678520 + min: 315392 + max: 2708832 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 98 + layers_on_npu: 101 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 98 - job_id: j56yzj07p + total_layers: 101 + job_id: jgdxr496p job_status: Passed torchscript_onnx_qnn: - inference_time: 12275.0 - throughput: 81.46639511201629 + inference_time: 12327.0 + throughput: 81.12273870365864 estimated_peak_memory_range: - min: 3219456 - max: 4887496 + min: 3497984 + max: 5081784 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 124 + layers_on_npu: 126 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 124 - job_id: jpv6ronm5 + total_layers: 126 + job_id: jp8q3rvqp job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:58:43Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T23:20:17Z' - torchscript_onnx_tflite: - inference_time: 13213.0 - throughput: 75.68303943086354 + inference_time: 13699.0 + throughput: 72.99802905321556 estimated_peak_memory_range: - min: 21020672 - max: 23190496 + min: 716800 + max: 2597248 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 98 + layers_on_npu: 101 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 98 - job_id: jp3j13rzg + total_layers: 101 + job_id: j57yjnwn5 job_status: Passed torchscript_onnx_qnn: - inference_time: 12084.0 - throughput: 82.75405494869248 + inference_time: 12312.0 + throughput: 81.22157244964262 estimated_peak_memory_range: - min: 3207168 - max: 4750208 + min: 4722688 + max: 6091848 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 124 + layers_on_npu: 126 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 124 - job_id: jgjv2m88g + total_layers: 126 + job_id: jgkel0mvg job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:58:45Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T23:20:18Z' - torchscript_onnx_tflite: - inference_time: 13242.0 - throughput: 75.51729346020238 + inference_time: 22728.0 + throughput: 43.99859204505456 estimated_peak_memory_range: - min: 22171648 - max: 24482816 + min: 303104 + max: 46306112 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 98 + layers_on_npu: 101 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 98 - job_id: jgo2n09dp + total_layers: 101 + job_id: jp4lx4o25 job_status: Passed torchscript_onnx_qnn: - inference_time: 12076.0 - throughput: 82.80887711162637 + inference_time: 20245.0 + throughput: 49.39491232403063 estimated_peak_memory_range: - min: 3239936 - max: 4663848 + min: 49152 + max: 5985296 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 124 + layers_on_npu: 126 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 124 - job_id: jpedw1n05 + total_layers: 126 + job_id: j5q671oep job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:58:46Z' + chipset: SA8295P + timestamp: '2024-11-09T23:20:19Z' - torchscript_onnx_tflite: - inference_time: 18387.0 - throughput: 54.38625115570784 + inference_time: 19620.0 + throughput: 50.9683995922528 estimated_peak_memory_range: - min: 14786560 - max: 94875776 + min: 319488 + max: 89606832 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 98 + layers_on_npu: 101 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 98 - job_id: jgz3j2xo5 + total_layers: 101 + job_id: jpxk7rj85 job_status: Passed torchscript_onnx_qnn: - inference_time: 18680.0 - throughput: 53.53319057815846 + inference_time: 20727.0 + throughput: 48.24624885415159 estimated_peak_memory_range: - min: 3174400 - max: 30332784 + min: 3276800 + max: 38171088 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 124 + layers_on_npu: 126 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 124 - job_id: jp4ldnx15 + total_layers: 126 + job_id: jglv08r25 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,36 +428,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:33:01Z' + timestamp: '2024-11-09T23:20:20Z' - torchscript_onnx_qnn: - inference_time: 12402.0 - throughput: 80.63215610385421 + inference_time: 13002.0 + throughput: 76.91124442393478 estimated_peak_memory_range: - min: 3170304 - max: 3170304 + min: 3256320 + max: 3256320 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 124 + layers_on_npu: 126 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 124 - job_id: jg9ly038g + total_layers: 126 + job_id: jpy142w0p job_status: Passed torchscript_onnx: - inference_time: 16679.0 - throughput: 59.95563283170454 + inference_time: 13568.0 + throughput: 73.70283018867924 estimated_peak_memory_range: - min: 69480448 - max: 69480448 + min: 12636160 + max: 12636160 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 126 + layers_on_npu: 127 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 126 - job_id: jp2k9x7qp + total_layers: 127 + job_id: jpv61mxz5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:33:06Z' + timestamp: '2024-11-09T23:20:24Z' diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/README.md b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/README.md index f8503741..5d888797 100644 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/README.md +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/README.md @@ -5,8 +5,7 @@ DeepLabV3 Quantized is designed for semantic segmentation at multiple scales, trained on various datasets. It uses MobileNet as a backbone. -This is based on the implementation of DeepLabV3-Plus-MobileNet-Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/deeplabv3_plus_mobilenet_quantized). diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/conftest.py b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/conftest.py index 67a1c167..24585b6c 100644 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/conftest.py +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/conftest.py @@ -9,7 +9,6 @@ import pytest from qai_hub_models.models.deeplabv3_plus_mobilenet_quantized import Model -from qai_hub_models.utils.testing import skip_clone_repo_check # Instantiate the model only once for all tests. @@ -22,7 +21,6 @@ def cached_from_pretrained(): from_pretrained = Model.from_pretrained sig = inspect.signature(from_pretrained) - @skip_clone_repo_check def _cached_from_pretrained(*args, **kwargs): cache_key = str(args) + str(kwargs) model = pretrained_cache.get(cache_key, None) diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/evaluate.py b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/evaluate.py new file mode 100644 index 00000000..ae04db87 --- /dev/null +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/evaluate.py @@ -0,0 +1,55 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.deeplabv3_plus_mobilenet_quantized import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["pascal_voc"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=400, + supported_datasets=SUPPORTED_DATASETS, + is_hub_quantized=True, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/export.py b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/export.py index f0b20d8a..368c8e3d 100644 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/export.py +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/export.py @@ -13,6 +13,7 @@ from typing import Any, Optional, cast import qai_hub as hub +import torch from qai_hub_models.models.common import ExportResult, TargetRuntime from qai_hub_models.models.deeplabv3_plus_mobilenet_quantized import Model @@ -22,6 +23,7 @@ get_model_kwargs, ) from qai_hub_models.utils.compare import torch_inference +from qai_hub_models.utils.input_spec import make_torch_inputs from qai_hub_models.utils.printing import ( print_inference_metrics, print_on_target_demo_cmd, @@ -31,11 +33,14 @@ can_access_qualcomm_ai_hub, export_without_hub_access, ) +from qai_hub_models.utils.quantization import get_calibration_data def export_model( device: Optional[str] = None, chipset: Optional[str] = None, + num_calibration_samples: int = 100, + skip_compiling: bool = False, skip_profiling: bool = False, skip_inferencing: bool = False, skip_downloading: bool = False, @@ -50,13 +55,14 @@ def export_model( This function executes the following recipe: 1. Instantiates a PyTorch model and converts it to a traced TorchScript format - 2. Compiles the model to an asset that can be run on device - 3. Profiles the model performance on a real device - 4. Inferences the model on sample inputs - 5. Downloads the model asset to the local directory - 6. Summarizes the results from profiling and inference + 2. Converts the PyTorch model to ONNX and quantizes the ONNX model. + 3. Compiles the model to an asset that can be run on device + 4. Profiles the model performance on a real device + 5. Inferences the model on sample inputs + 6. Downloads the model asset to the local directory + 7. Summarizes the results from profiling and inference - Each of the last 4 steps can be optionally skipped using the input options. + Each of the last 5 steps can be optionally skipped using the input options. Parameters: device: Device for which to export the model. @@ -64,6 +70,9 @@ def export_model( Defaults to DEFAULT_DEVICE if not specified. chipset: If set, will choose a random device with this chipset. Overrides the `device` argument. + num_calibration_samples: The number of calibration data samples + to use for quantization. + skip_compiling: If set, skips compiling model to format that can run on device. skip_profiling: If set, skips profiling of compiled model on real devices. skip_inferencing: If set, skips computing on-device outputs from sample data. skip_downloading: If set, skips downloading of compiled model. @@ -79,9 +88,10 @@ def export_model( Returns: A struct of: - * A CompileJob object containing metadata about the compile job submitted to hub. + * A CompileJob object containing metadata about the compile job submitted to hub (None if compiling skipped). * An InferenceJob containing metadata about the inference job (None if inferencing skipped). * A ProfileJob containing metadata about the profile job (None if profiling skipped). + * A QuantizeJob object containing metadata about the quantize job submitted to hub """ model_name = "deeplabv3_plus_mobilenet_quantized" output_path = Path(output_dir or Path.cwd() / "build" / model_name) @@ -115,26 +125,45 @@ def export_model( ) # Trace the model - source_model = model.convert_to_hub_source_model( - target_runtime, output_path, input_spec + source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) + + print(f"Quantizing model {model_name} with {num_calibration_samples} samples.") + # 2. Converts the PyTorch model to ONNX and quantizes the ONNX model. + onnx_compile_job = hub.submit_compile_job( + model=source_model, + input_specs=input_spec, + device=hub_device, + name=model_name, + options="--target_runtime onnx", + ) + quantize_job = hub.submit_quantize_job( + model=onnx_compile_job.get_target_model(), + calibration_data=get_calibration_data( + input_spec, "pascal_voc", num_calibration_samples + ), + weights_dtype=model.get_weights_dtype(), + activations_dtype=model.get_activations_dtype(), + name=model_name, + options=model.get_quantize_options(), ) + if skip_compiling: + return ExportResult(quantize_job=quantize_job) - # 2. Compiles the model to an asset that can be run on device + # 3. Compiles the model to an asset that can be run on device model_compile_options = model.get_hub_compile_options( target_runtime, compile_options, hub_device ) print(f"Optimizing model {model_name} to run on-device") submitted_compile_job = hub.submit_compile_job( - model=source_model, + model=quantize_job.get_target_model(), input_specs=input_spec, device=hub_device, name=model_name, - calibration_data=model.get_calibration_data(target_runtime), options=model_compile_options, ) compile_job = cast(hub.client.CompileJob, submitted_compile_job) - # 3. Profiles the model performance on a real device + # 4. Profiles the model performance on a real device profile_job: Optional[hub.client.ProfileJob] = None if not skip_profiling: profile_options_all = model.get_hub_profile_options( @@ -149,7 +178,7 @@ def export_model( ) profile_job = cast(hub.client.ProfileJob, submitted_profile_job) - # 4. Inferences the model on sample inputs + # 5. Inferences the model on sample inputs inference_job: Optional[hub.client.InferenceJob] = None if not skip_inferencing: profile_options_all = model.get_hub_profile_options( @@ -170,13 +199,13 @@ def export_model( ) inference_job = cast(hub.client.InferenceJob, submitted_inference_job) - # 5. Downloads the model asset to the local directory + # 6. Downloads the model asset to the local directory if not skip_downloading: os.makedirs(output_path, exist_ok=True) target_model: hub.Model = compile_job.get_target_model() # type: ignore target_model.download(str(output_path / model_name)) - # 6. Summarizes the results from profiling and inference + # 7. Summarizes the results from profiling and inference if not skip_summary and not skip_profiling: assert profile_job is not None and profile_job.wait().success profile_data: dict[str, Any] = profile_job.download_profile() # type: ignore @@ -201,12 +230,13 @@ def export_model( compile_job=compile_job, inference_job=inference_job, profile_job=profile_job, + quantize_job=quantize_job, ) def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, is_hub_quantized=True) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/model.py b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/model.py index 3bcde6cd..2e8161da 100644 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/model.py +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/model.py @@ -4,80 +4,11 @@ # --------------------------------------------------------------------- from __future__ import annotations -# isort: off -# This verifies aimet is installed, and this must be included first. -from qai_hub_models.utils.quantization_aimet import ( - AIMETQuantizableMixin, -) - -# isort: on - -import torch -from aimet_torch.cross_layer_equalization import equalize_model -from aimet_torch.model_preparer import prepare_model -from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim - from qai_hub_models.models.deeplabv3_plus_mobilenet.model import DeepLabV3PlusMobilenet -from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset -from qai_hub_models.utils.quantization_aimet import ( - constrain_quantized_inputs_to_image_range, - tie_observers, -) +from qai_hub_models.utils.quantization import HubQuantizableMixin MODEL_ID = __name__.split(".")[-2] -MODEL_ASSET_VERSION = 4 -DEFAULT_ENCODINGS = "deeplabv3_plus_mobilenet_quantized_encodings.json" - - -class DeepLabV3PlusMobilenetQuantizable(AIMETQuantizableMixin, DeepLabV3PlusMobilenet): - """ - DeepLabV3PlusMobileNet with post train quantization support. - - Supports only 8 bit weights and activations - """ - - def __init__( - self, - deeplabv3_model: QuantizationSimModel, - ) -> None: - DeepLabV3PlusMobilenet.__init__( - self, deeplabv3_model.model, normalize_input=False - ) - AIMETQuantizableMixin.__init__(self, deeplabv3_model) - - @classmethod - def from_pretrained( - cls, - aimet_encodings: str | None = "DEFAULT", - normalize_input: bool = True, - ) -> DeepLabV3PlusMobilenetQuantizable: - # Load Model - fp16_model = DeepLabV3PlusMobilenet.from_pretrained( - normalize_input=normalize_input - ) - input_shape = cls.get_input_spec()["image"][0] - - model = prepare_model(fp16_model) - equalize_model(model, input_shape) - - sim = QuantizationSimModel( - model, - quant_scheme="tf_enhanced", - default_param_bw=8, - default_output_bw=8, - config_file=get_default_aimet_config(), - dummy_input=torch.rand(input_shape), - ) - tie_observers(sim) - constrain_quantized_inputs_to_image_range(sim) - if aimet_encodings: - if aimet_encodings == "DEFAULT": - aimet_encodings = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_ENCODINGS - ).fetch() - load_encodings_to_sim(sim, aimet_encodings) - final_model = cls(sim) - return final_model +class DeepLabV3PlusMobilenetQuantizable(HubQuantizableMixin, DeepLabV3PlusMobilenet): + pass diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/perf.yaml b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/perf.yaml index 0201a60f..f7137491 100644 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/perf.yaml +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/perf.yaml @@ -49,102 +49,34 @@ models: - name: DeepLabV3-Plus-MobileNet-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 3369.0 - throughput: 296.8239833778569 + inference_time: 4164.0 + throughput: 240.15369836695484 estimated_peak_memory_range: - min: 16384 - max: 1733504 + min: 307200 + max: 9450592 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 104 + layers_on_npu: 136 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jpxk69y35 + total_layers: 136 + job_id: jgkelwyyg job_status: Passed torchscript_onnx_qnn: - inference_time: 5220.0 - throughput: 191.57088122605364 + inference_time: 4776.0 + throughput: 209.38023450586266 estimated_peak_memory_range: - min: 12288 - max: 12508632 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 142 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 142 - job_id: jp3j1ne3g - job_status: Passed - torchscript_onnx: - inference_time: 4198.0 - throughput: 238.20867079561697 - estimated_peak_memory_range: - min: 12427264 - max: 21016976 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 103 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 103 - job_id: jpxk69135 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:32:18Z' - - torchscript_onnx_tflite: - inference_time: 3333.0 - throughput: 300.03000300030004 - estimated_peak_memory_range: - min: 16384 - max: 9856256 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 104 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 104 - job_id: j5mn6e3dp - job_status: Passed - torchscript_onnx_qnn: - inference_time: 5207.0 - throughput: 192.04916458613405 - estimated_peak_memory_range: - min: 20480 - max: 12081984 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 142 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 142 - job_id: jgo2nz3qp - job_status: Passed - torchscript_onnx: - inference_time: 4232.0 - throughput: 236.29489603024575 - estimated_peak_memory_range: - min: 11120640 - max: 19627008 + min: 32768 + max: 10857136 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 103 + layers_on_npu: 127 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 103 - job_id: j5mn6ezdp + total_layers: 127 + job_id: jg9l374qg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -153,51 +85,36 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:32:19Z' + timestamp: '2024-11-09T23:52:05Z' - torchscript_onnx_tflite: - inference_time: 2463.0 - throughput: 406.00893219650834 + inference_time: 2994.0 + throughput: 334.001336005344 estimated_peak_memory_range: min: 12288 - max: 67627776 + max: 78918048 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 104 + layers_on_npu: 136 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jgn6m03k5 + total_layers: 136 + job_id: j5q67x27p job_status: Passed torchscript_onnx_qnn: - inference_time: 3832.0 - throughput: 260.96033402922757 - estimated_peak_memory_range: - min: 802816 - max: 29696624 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 142 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 142 - job_id: jpv6rqwk5 - job_status: Passed - torchscript_onnx: - inference_time: 3682.0 - throughput: 271.59152634437805 + inference_time: 3442.0 + throughput: 290.5287623474724 estimated_peak_memory_range: - min: 12333056 - max: 86692688 + min: 827392 + max: 37764544 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 103 + layers_on_npu: 127 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 103 - job_id: jgn6m0ek5 + total_layers: 127 + job_id: jp14dk8kp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -206,51 +123,36 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:32:20Z' + timestamp: '2024-11-09T23:52:07Z' - torchscript_onnx_tflite: - inference_time: 2631.0 - throughput: 380.08361839604714 + inference_time: 2801.0 + throughput: 357.0153516601214 estimated_peak_memory_range: min: 8192 - max: 43452656 + max: 53302144 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 104 + layers_on_npu: 136 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jglv243j5 + total_layers: 136 + job_id: jglv09ke5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3834.0 - throughput: 260.8242044861763 + inference_time: 3450.0 + throughput: 289.8550724637681 estimated_peak_memory_range: - min: 798720 - max: 27807296 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 142 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 142 - job_id: j57yl2dv5 - job_status: Passed - torchscript_onnx: - inference_time: 2972.0 - throughput: 336.47375504710635 - estimated_peak_memory_range: - min: 14446592 - max: 63802128 + min: 823296 + max: 32214688 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 103 + layers_on_npu: 127 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 103 - job_id: jp2k9xmrp + total_layers: 127 + job_id: jgdxryvkp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -259,36 +161,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:32:22Z' + timestamp: '2024-11-09T23:52:09Z' - torchscript_onnx_tflite: - inference_time: 14140.0 - throughput: 70.72135785007072 + inference_time: 18129.0 + throughput: 55.16024049864858 estimated_peak_memory_range: - min: 5771264 - max: 48802992 + min: 327680 + max: 64840736 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 104 + layers_on_npu: 136 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jprv26e0g + total_layers: 136 + job_id: j56y391vp job_status: Passed torchscript_onnx_qnn: - inference_time: 17959.0 - throughput: 55.682387660782894 + inference_time: 19952.0 + throughput: 50.12028869286287 estimated_peak_memory_range: - min: 856064 - max: 8852624 + min: 1118208 + max: 9182576 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 142 + layers_on_npu: 127 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 142 - job_id: jgjv2dlvg + total_layers: 127 + job_id: j57yj1dq5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -297,21 +199,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-27T00:32:07Z' + timestamp: '2024-11-09T23:51:45Z' - torchscript_onnx_tflite: - inference_time: 121373.0 - throughput: 8.2390647013751 + inference_time: 162562.0 + throughput: 6.151499120335626 estimated_peak_memory_range: - min: 11452416 - max: 32775632 + min: 4001792 + max: 10624192 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 101 - layers_on_gpu: 3 + layers_on_npu: 136 + layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jp2k9xlrp + total_layers: 136 + job_id: jp3j4lmxg job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -320,36 +222,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-27T00:31:55Z' + timestamp: '2024-11-09T23:51:24Z' - torchscript_onnx_tflite: - inference_time: 3272.0 - throughput: 305.6234718826406 + inference_time: 4143.0 + throughput: 241.3709872073377 estimated_peak_memory_range: - min: 12288 - max: 1395744 + min: 311296 + max: 2679784 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 104 + layers_on_npu: 136 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jpy1jz68p + total_layers: 136 + job_id: jgo217v4p job_status: Passed torchscript_onnx_qnn: - inference_time: 3972.0 - throughput: 251.76233635448136 + inference_time: 3881.0 + throughput: 257.6655501159495 estimated_peak_memory_range: - min: 843776 - max: 2142024 + min: 847872 + max: 2283144 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 142 + layers_on_npu: 127 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 142 - job_id: jpedwovo5 + total_layers: 127 + job_id: jp4lx6wq5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -358,36 +260,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:32:08Z' + timestamp: '2024-11-09T23:51:48Z' - torchscript_onnx_tflite: - inference_time: 3315.0 - throughput: 301.65912518853696 + inference_time: 4197.0 + throughput: 238.2654276864427 estimated_peak_memory_range: - min: 12288 - max: 1565720 + min: 339968 + max: 2094112 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 104 + layers_on_npu: 136 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: j57ylw8r5 + total_layers: 136 + job_id: jpv61yw75 job_status: Passed torchscript_onnx_qnn: - inference_time: 3975.0 - throughput: 251.57232704402514 + inference_time: 3939.0 + throughput: 253.87154100025387 estimated_peak_memory_range: - min: 823296 - max: 2256424 + min: 839680 + max: 2125696 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 142 + layers_on_npu: 127 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 142 - job_id: jgn6mywm5 + total_layers: 127 + job_id: j5mnw1zyp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -396,36 +298,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:58:18Z' + timestamp: '2024-11-09T23:51:53Z' - torchscript_onnx_tflite: - inference_time: 3299.0 - throughput: 303.12215822976657 + inference_time: 4185.0 + throughput: 238.94862604540023 estimated_peak_memory_range: - min: 16384 - max: 1744456 + min: 311296 + max: 3046864 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 104 + layers_on_npu: 136 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jp4ldo2l5 + total_layers: 136 + job_id: jgjv06l7g job_status: Passed torchscript_onnx_qnn: - inference_time: 3983.0 - throughput: 251.06703489831784 + inference_time: 3940.0 + throughput: 253.80710659898477 estimated_peak_memory_range: - min: 749568 - max: 2114040 + min: 24576 + max: 1521040 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 142 + layers_on_npu: 127 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 142 - job_id: jprv2q7eg + total_layers: 127 + job_id: jgn69d9v5 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -434,36 +336,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:58:19Z' + timestamp: '2024-11-09T23:51:55Z' - torchscript_onnx_tflite: - inference_time: 3336.0 - throughput: 299.7601918465228 + inference_time: 4175.0 + throughput: 239.52095808383234 estimated_peak_memory_range: - min: 20480 - max: 1551264 + min: 49152 + max: 2549720 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 104 + layers_on_npu: 136 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: j5mn62lqp + total_layers: 136 + job_id: jpedr0v75 job_status: Passed torchscript_onnx_qnn: - inference_time: 3986.0 - throughput: 250.87807325639739 + inference_time: 3932.0 + throughput: 254.323499491353 estimated_peak_memory_range: - min: 827392 - max: 2067296 + min: 4407296 + max: 5768608 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 142 + layers_on_npu: 127 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 142 - job_id: jp0z2qxe5 + total_layers: 127 + job_id: jprv4m4vg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -472,36 +374,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:58:21Z' + timestamp: '2024-11-09T23:51:57Z' - torchscript_onnx_tflite: - inference_time: 5839.0 - throughput: 171.26220243192327 + inference_time: 6632.0 + throughput: 150.78407720144753 estimated_peak_memory_range: - min: 5558272 - max: 48226432 + min: 303104 + max: 52132064 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 104 + layers_on_npu: 136 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jpxk6jz95 + total_layers: 136 + job_id: jgz3xq7z5 job_status: Passed torchscript_onnx_qnn: - inference_time: 6643.0 - throughput: 150.5343971097396 + inference_time: 7203.0 + throughput: 138.83104262113008 estimated_peak_memory_range: - min: 839680 - max: 6815792 + min: 856064 + max: 6713168 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 142 + layers_on_npu: 127 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 142 - job_id: jp2k96zmp + total_layers: 127 + job_id: jp2k7q7xp job_status: Passed reference_device_info: name: SA8295P ADP @@ -510,36 +412,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T01:58:20Z' + timestamp: '2024-11-09T23:52:00Z' - torchscript_onnx_tflite: - inference_time: 4080.0 - throughput: 245.09803921568627 + inference_time: 4841.0 + throughput: 206.5688907250568 estimated_peak_memory_range: - min: 12288 - max: 67893216 + min: 319488 + max: 80508576 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 104 + layers_on_npu: 136 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: j5q6r03np + total_layers: 136 + job_id: j5wed09z5 job_status: Passed torchscript_onnx_qnn: - inference_time: 5403.0 - throughput: 185.08236165093467 + inference_time: 5666.0 + throughput: 176.49135192375573 estimated_peak_memory_range: - min: 802816 - max: 35129520 + min: 827392 + max: 35306144 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 142 + layers_on_npu: 127 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 142 - job_id: jgdxqnvrp + total_layers: 127 + job_id: jpy14k4rp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -548,36 +450,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:32:14Z' + timestamp: '2024-11-09T23:52:03Z' - torchscript_onnx_qnn: - inference_time: 4342.0 - throughput: 230.30861354214647 + inference_time: 4267.0 + throughput: 234.35669088352472 estimated_peak_memory_range: min: 815104 max: 815104 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 142 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 142 - job_id: jgz3j27o5 - job_status: Passed - torchscript_onnx: - inference_time: 4668.0 - throughput: 214.22450728363324 - estimated_peak_memory_range: - min: 18251776 - max: 18251776 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 103 + layers_on_npu: 127 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 103 - job_id: jprv26y0g + total_layers: 127 + job_id: jpxk781j5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -586,4 +473,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:32:21Z' + timestamp: '2024-11-09T23:52:12Z' diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/test.py b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/test.py deleted file mode 100644 index 999eebf7..00000000 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/test.py +++ /dev/null @@ -1,66 +0,0 @@ -# --------------------------------------------------------------------- -# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -# --------------------------------------------------------------------- -import os -import zipfile - -import torch - -from qai_hub_models.models._shared.deeplab.app import DeepLabV3App -from qai_hub_models.models._shared.deeplab.model import NUM_CLASSES -from qai_hub_models.models.deeplabv3_plus_mobilenet.test import INPUT_IMAGE_ADDRESS -from qai_hub_models.models.deeplabv3_plus_mobilenet_quantized.demo import ( - main as demo_main, -) -from qai_hub_models.models.deeplabv3_plus_mobilenet_quantized.model import ( - MODEL_ASSET_VERSION, - MODEL_ID, - DeepLabV3PlusMobilenetQuantizable, -) -from qai_hub_models.utils.asset_loaders import ( - CachedWebModelAsset, - load_image, - load_numpy, - qaihm_temp_dir, -) -from qai_hub_models.utils.testing import skip_clone_repo_check - -OUTPUT_IMAGE_MASK = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, "deeplab_output_mask.npy" -) - - -@skip_clone_repo_check -def test_task(): - # AIMET Quantization Simulator introduces randomness. Eliminate that for this test. - torch.manual_seed(0) - image = load_image(INPUT_IMAGE_ADDRESS) - app = DeepLabV3App( - DeepLabV3PlusMobilenetQuantizable.from_pretrained(), num_classes=NUM_CLASSES - ) - output_mask = app.predict(image, True) - output_mask_gt = load_numpy(OUTPUT_IMAGE_MASK) - assert (output_mask == output_mask_gt).mean() > 0.95 - - -@skip_clone_repo_check -def test_aimet_export(): - model = DeepLabV3PlusMobilenetQuantizable.from_pretrained() - name = model.__class__.__name__ - with qaihm_temp_dir() as tmpdir: - output_zip = model.convert_to_onnx_and_aimet_encodings( - tmpdir, - ) - assert os.path.exists(output_zip) - with zipfile.ZipFile(output_zip, "r") as zip: - assert zip.namelist() == [ - f"{name}.aimet/", - f"{name}.aimet/{name}.onnx", - f"{name}.aimet/{name}.encodings", - ] - - -@skip_clone_repo_check -def test_demo(): - demo_main(is_test=True) diff --git a/qai_hub_models/models/deeplabv3_resnet50/README.md b/qai_hub_models/models/deeplabv3_resnet50/README.md index e57f8d8a..7852ddd8 100644 --- a/qai_hub_models/models/deeplabv3_resnet50/README.md +++ b/qai_hub_models/models/deeplabv3_resnet50/README.md @@ -5,8 +5,7 @@ DeepLabV3 is designed for semantic segmentation at multiple scales, trained on the COCO dataset. It uses ResNet50 as a backbone. -This is based on the implementation of DeepLabV3-ResNet50 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/deeplabv3_resnet50). diff --git a/qai_hub_models/models/deeplabv3_resnet50/evaluate.py b/qai_hub_models/models/deeplabv3_resnet50/evaluate.py new file mode 100644 index 00000000..248db83b --- /dev/null +++ b/qai_hub_models/models/deeplabv3_resnet50/evaluate.py @@ -0,0 +1,56 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.deeplabv3_resnet50 import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["pascal_voc"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=400, + supported_datasets=SUPPORTED_DATASETS, + supports_qnn=False, + supports_onnx=False, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/deeplabv3_resnet50/export.py b/qai_hub_models/models/deeplabv3_resnet50/export.py index c8c9c5fb..264eaf75 100644 --- a/qai_hub_models/models/deeplabv3_resnet50/export.py +++ b/qai_hub_models/models/deeplabv3_resnet50/export.py @@ -205,12 +205,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser( - model_cls=Model, - supports_qnn=False, - supports_onnx=False, - supports_precompiled_qnn_onnx=False, - ) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_onnx=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/deeplabv3_resnet50/model.py b/qai_hub_models/models/deeplabv3_resnet50/model.py index d43dff14..f688640d 100644 --- a/qai_hub_models/models/deeplabv3_resnet50/model.py +++ b/qai_hub_models/models/deeplabv3_resnet50/model.py @@ -56,6 +56,3 @@ def get_hub_profile_options( ): profile_options = profile_options + " --compute_unit gpu" return profile_options - - def forward(self, image): - return super().forward(image)["out"] diff --git a/qai_hub_models/models/deeplabv3_resnet50/perf.yaml b/qai_hub_models/models/deeplabv3_resnet50/perf.yaml index 7c234ef2..140dc369 100644 --- a/qai_hub_models/models/deeplabv3_resnet50/perf.yaml +++ b/qai_hub_models/models/deeplabv3_resnet50/perf.yaml @@ -16,10 +16,13 @@ aggregated: - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ + - Snapdragon X Elite CRD + - Snapdragon X Plus 8-Core CRD - QCS8450 (Proxy) - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -28,28 +31,31 @@ aggregated: - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 1 - Snapdragon® 888 + - Snapdragon® X Elite + - Snapdragon® X Plus 8-Core - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: DeepLabV3-ResNet50 performance_metrics: - torchscript_onnx_tflite: - inference_time: 293137.0 - throughput: 3.4113742038705452 + inference_time: 294937.0 + throughput: 3.3905545930147793 estimated_peak_memory_range: - min: 69632 - max: 149329616 + min: 12288 + max: 353957032 primary_compute_unit: GPU precision: fp16 layer_info: layers_on_npu: 0 - layers_on_gpu: 95 - layers_on_cpu: 0 - total_layers: 95 - job_id: jprv2620g + layers_on_gpu: 98 + layers_on_cpu: 2 + total_layers: 100 + job_id: j57yjnz95 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -58,44 +64,21 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:30:58Z' + timestamp: '2024-11-09T23:19:22Z' - torchscript_onnx_tflite: - inference_time: 293265.0 - throughput: 3.40988525736109 + inference_time: 206139.0 + throughput: 4.851095619945765 estimated_peak_memory_range: - min: 1101824 - max: 149550400 + min: 22056960 + max: 49467600 primary_compute_unit: GPU precision: fp16 layer_info: layers_on_npu: 0 - layers_on_gpu: 95 - layers_on_cpu: 0 - total_layers: 95 - job_id: jp2k9x9rp - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:30:59Z' - - torchscript_onnx_tflite: - inference_time: 225190.0 - throughput: 4.440694524623651 - estimated_peak_memory_range: - min: 22372352 - max: 42775344 - primary_compute_unit: GPU - precision: fp16 - layer_info: - layers_on_npu: 0 - layers_on_gpu: 95 - layers_on_cpu: 0 - total_layers: 95 - job_id: jpy1jzj8p + layers_on_gpu: 98 + layers_on_cpu: 2 + total_layers: 100 + job_id: jp4lx4q15 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -104,21 +87,21 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:31:00Z' + timestamp: '2024-11-09T23:19:23Z' - torchscript_onnx_tflite: - inference_time: 218372.0 - throughput: 4.579341673840968 + inference_time: 222046.0 + throughput: 4.503571332066328 estimated_peak_memory_range: - min: 21069824 - max: 37316880 + min: 23195648 + max: 43731584 primary_compute_unit: GPU precision: fp16 layer_info: layers_on_npu: 0 - layers_on_gpu: 95 - layers_on_cpu: 0 - total_layers: 95 - job_id: j56yz2z6p + layers_on_gpu: 98 + layers_on_cpu: 2 + total_layers: 100 + job_id: jpxk7rvl5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -127,21 +110,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:31:07Z' + timestamp: '2024-11-09T23:19:24Z' - torchscript_onnx_tflite: - inference_time: 289533.0 - throughput: 3.453837731795685 + inference_time: 292298.0 + throughput: 3.4211660702433817 estimated_peak_memory_range: - min: 0 - max: 148008864 + min: 32768 + max: 281180904 primary_compute_unit: GPU precision: fp16 layer_info: layers_on_npu: 0 - layers_on_gpu: 95 - layers_on_cpu: 0 - total_layers: 95 - job_id: jp0z24295 + layers_on_gpu: 98 + layers_on_cpu: 2 + total_layers: 100 + job_id: j5mnwkr9p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -150,21 +133,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:31:01Z' + timestamp: '2024-11-09T23:19:25Z' - torchscript_onnx_tflite: - inference_time: 290277.0 - throughput: 3.444985307137665 + inference_time: 296485.0 + throughput: 3.3728519149366747 estimated_peak_memory_range: - min: 49152 - max: 148054824 + min: 2150400 + max: 140605448 primary_compute_unit: GPU precision: fp16 layer_info: layers_on_npu: 0 - layers_on_gpu: 95 - layers_on_cpu: 0 - total_layers: 95 - job_id: j5we3vzj5 + layers_on_gpu: 98 + layers_on_cpu: 2 + total_layers: 100 + job_id: jpv61jlj5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -173,21 +156,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:57:42Z' + timestamp: '2024-11-11T13:55:08Z' - torchscript_onnx_tflite: - inference_time: 289686.0 - throughput: 3.4520135595092616 + inference_time: 292364.0 + throughput: 3.4203937557291595 estimated_peak_memory_range: - min: 22200320 - max: 200195744 + min: 20480 + max: 133789552 primary_compute_unit: GPU precision: fp16 layer_info: layers_on_npu: 0 - layers_on_gpu: 95 - layers_on_cpu: 0 - total_layers: 95 - job_id: jg9ly12vg + layers_on_gpu: 98 + layers_on_cpu: 2 + total_layers: 100 + job_id: jprv4dk7g job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -196,21 +179,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:57:43Z' + timestamp: '2024-11-09T23:19:27Z' - torchscript_onnx_tflite: - inference_time: 292689.0 - throughput: 3.416595772304391 + inference_time: 295452.0 + throughput: 3.384644544629923 estimated_peak_memory_range: - min: 61440 - max: 148542216 + min: 36864 + max: 171508400 primary_compute_unit: GPU precision: fp16 layer_info: layers_on_npu: 0 - layers_on_gpu: 95 - layers_on_cpu: 0 - total_layers: 95 - job_id: jp14wl1lp + layers_on_gpu: 98 + layers_on_cpu: 2 + total_layers: 100 + job_id: jp2k7d8qp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -219,21 +202,44 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:57:44Z' + timestamp: '2024-11-09T23:19:28Z' + - torchscript_onnx_tflite: + inference_time: 279707.0 + throughput: 3.5751697311829878 + estimated_peak_memory_range: + min: 6520832 + max: 27783856 + primary_compute_unit: GPU + precision: fp16 + layer_info: + layers_on_npu: 0 + layers_on_gpu: 98 + layers_on_cpu: 2 + total_layers: 100 + job_id: jpy142elp + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T23:19:29Z' - torchscript_onnx_tflite: - inference_time: 393817.0 - throughput: 2.5392504640480222 + inference_time: 467317.0 + throughput: 2.1398750740931742 estimated_peak_memory_range: - min: 21614592 - max: 47943456 + min: 23961600 + max: 53378288 primary_compute_unit: GPU precision: fp16 layer_info: layers_on_npu: 0 - layers_on_gpu: 95 - layers_on_cpu: 0 - total_layers: 95 - job_id: jglv242j5 + layers_on_gpu: 98 + layers_on_cpu: 2 + total_layers: 100 + job_id: jp0z19yn5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -242,4 +248,4 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:31:06Z' + timestamp: '2024-11-09T23:19:30Z' diff --git a/qai_hub_models/models/densenet121/README.md b/qai_hub_models/models/densenet121/README.md index a291d310..dd2c1346 100644 --- a/qai_hub_models/models/densenet121/README.md +++ b/qai_hub_models/models/densenet121/README.md @@ -5,8 +5,7 @@ Densenet is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of DenseNet-121 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/densenet121). diff --git a/qai_hub_models/models/densenet121/perf.yaml b/qai_hub_models/models/densenet121/perf.yaml index 508d0880..c237d2c9 100644 --- a/qai_hub_models/models/densenet121/perf.yaml +++ b/qai_hub_models/models/densenet121/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,6 +36,7 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: @@ -44,61 +46,8 @@ models: inference_time: 1927.0 throughput: 518.9413596263622 estimated_peak_memory_range: - min: 20480 - max: 8144360 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 312 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 312 - job_id: j57yl20v5 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 1995.0 - throughput: 501.2531328320802 - estimated_peak_memory_range: - min: 16384 - max: 28560800 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 372 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 372 - job_id: jp8qm20kp - job_status: Passed - torchscript_onnx: - inference_time: 1868.0 - throughput: 535.3319057815846 - estimated_peak_memory_range: - min: 16384 - max: 18976888 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 374 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 374 - job_id: j5we3w335 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:30:29Z' - - torchscript_onnx_tflite: - inference_time: 1927.0 - throughput: 518.9413596263622 - estimated_peak_memory_range: - min: 12288 - max: 1740040 + min: 28672 + max: 241120864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 312 - job_id: jp4ldnk85 + job_id: j5wedzjm5 job_status: Passed torchscript_onnx_qnn: inference_time: 1990.0 throughput: 502.51256281407035 estimated_peak_memory_range: - min: 12288 - max: 27901328 + min: 16384 + max: 30130384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: jgkeqv7wg + job_id: jprv4dz7g job_status: Passed torchscript_onnx: - inference_time: 1857.0 - throughput: 538.5029617662897 + inference_time: 1879.0 + throughput: 532.197977647685 estimated_peak_memory_range: - min: 49152 - max: 240127568 + min: 12288 + max: 18829808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jg9ly0ywg + job_id: jgo21wykp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -145,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:30:30Z' + timestamp: '2024-11-09T23:18:52Z' - torchscript_onnx_tflite: - inference_time: 1430.0 - throughput: 699.3006993006993 + inference_time: 1287.0 + throughput: 777.000777000777 estimated_peak_memory_range: min: 16384 - max: 105435360 + max: 105307344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 312 - job_id: jpxk69n35 + job_id: jg9l3268g job_status: Passed torchscript_onnx_qnn: - inference_time: 1479.0 - throughput: 676.132521974307 + inference_time: 1325.0 + throughput: 754.7169811320755 estimated_peak_memory_range: - min: 626688 - max: 21606624 + min: 618496 + max: 21304112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: j5q6r0enp + job_id: jp2k7d2qp job_status: Passed torchscript_onnx: - inference_time: 1434.0 - throughput: 697.350069735007 + inference_time: 1321.0 + throughput: 757.002271006813 estimated_peak_memory_range: min: 0 - max: 110609216 + max: 110932496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,7 +138,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jp14w2w8p + job_id: jpv61m3r5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -198,13 +147,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:30:31Z' + timestamp: '2024-11-09T23:18:53Z' - torchscript_onnx_tflite: - inference_time: 1017.0 - throughput: 983.284169124877 + inference_time: 1224.0 + throughput: 816.9934640522875 estimated_peak_memory_range: min: 12288 - max: 27672416 + max: 27322960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 312 - job_id: jp0z24395 + job_id: jp14d1r7p job_status: Passed torchscript_onnx_qnn: - inference_time: 1262.0 - throughput: 792.3930269413629 + inference_time: 1292.0 + throughput: 773.9938080495356 estimated_peak_memory_range: - min: 614400 - max: 19335888 + min: 0 + max: 18754240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: jgz3j2jo5 + job_id: jpy1429lp job_status: Passed torchscript_onnx: - inference_time: 1292.0 - throughput: 773.9938080495356 + inference_time: 1297.0 + throughput: 771.0100231303007 estimated_peak_memory_range: min: 0 - max: 32739312 + max: 32446112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -242,7 +191,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: j57yl2lv5 + job_id: jgjv0yxeg job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -251,13 +200,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:30:33Z' + timestamp: '2024-11-09T23:18:54Z' - torchscript_onnx_tflite: - inference_time: 1930.0 - throughput: 518.1347150259068 + inference_time: 1921.0 + throughput: 520.5622071837585 estimated_peak_memory_range: min: 20480 - max: 1473520 + max: 1332944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 312 - job_id: j5mn6eqdp + job_id: jgdxr4jzp job_status: Passed torchscript_onnx_qnn: inference_time: 1786.0 throughput: 559.9104143337066 estimated_peak_memory_range: min: 634880 - max: 1907360 + max: 2394720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,7 +229,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: jglv246j5 + job_id: jp0z19nn5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -289,13 +238,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:30:20Z' + timestamp: '2024-11-09T23:18:44Z' - torchscript_onnx_tflite: - inference_time: 1928.0 - throughput: 518.6721991701245 + inference_time: 1925.0 + throughput: 519.4805194805194 estimated_peak_memory_range: - min: 24576 - max: 1516520 + min: 28672 + max: 1565072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 312 - job_id: jgkeqn0og + job_id: j57yjnq95 job_status: Passed torchscript_onnx_qnn: - inference_time: 1800.0 - throughput: 555.5555555555555 + inference_time: 1806.0 + throughput: 553.7098560354374 estimated_peak_memory_range: - min: 634880 - max: 1925088 + min: 151552 + max: 1432840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,7 +267,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: j56yzjm7p + job_id: jgkel0jng job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -327,13 +276,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:57:16Z' + timestamp: '2024-11-09T23:18:47Z' - torchscript_onnx_tflite: - inference_time: 1926.0 - throughput: 519.2107995846313 + inference_time: 1920.0 + throughput: 520.8333333333334 estimated_peak_memory_range: - min: 40960 - max: 28508880 + min: 53248 + max: 1982576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 312 - job_id: j5q6rk1mp + job_id: jp4lx4z15 job_status: Passed torchscript_onnx_qnn: - inference_time: 1797.0 - throughput: 556.4830272676684 + inference_time: 1795.0 + throughput: 557.1030640668523 estimated_peak_memory_range: - min: 36864 - max: 1275256 + min: 638976 + max: 1727752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,7 +305,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: jp3j137zg + job_id: j5q671jop job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -365,13 +314,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:57:17Z' + timestamp: '2024-11-09T23:18:48Z' - torchscript_onnx_tflite: - inference_time: 1918.0 - throughput: 521.376433785193 + inference_time: 1930.0 + throughput: 518.1347150259068 estimated_peak_memory_range: - min: 28672 - max: 220185864 + min: 24576 + max: 2200456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 312 - job_id: jglv2z8l5 + job_id: jpxk7rwl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1804.0 - throughput: 554.3237250554324 + inference_time: 1802.0 + throughput: 554.9389567147614 estimated_peak_memory_range: - min: 655360 - max: 1999760 + min: 634880 + max: 2272376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,7 +343,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: jgo2n0wdp + job_id: jglv08nm5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -403,13 +352,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:57:18Z' + timestamp: '2024-11-09T23:18:49Z' - torchscript_onnx_tflite: - inference_time: 2610.0 - throughput: 383.1417624521073 + inference_time: 3339.0 + throughput: 299.4908655286014 estimated_peak_memory_range: - min: 12288 - max: 107085776 + min: 16384 + max: 27082304 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 312 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 312 + job_id: j5mnwkj9p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 3289.0 + throughput: 304.04378230465187 + estimated_peak_memory_range: + min: 618496 + max: 6479952 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 372 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 372 + job_id: j56y3m6yp + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T23:18:50Z' + - torchscript_onnx_tflite: + inference_time: 2607.0 + throughput: 383.5826620636747 + estimated_peak_memory_range: + min: 20480 + max: 106948640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 312 - job_id: jpy1jzr8p + job_id: jgn69qjq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2716.0 - throughput: 368.1885125184094 + inference_time: 2695.0 + throughput: 371.0575139146568 estimated_peak_memory_range: min: 618496 - max: 22675584 + max: 22773808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: jpedwowo5 + job_id: jp3j47kng job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:30:26Z' + timestamp: '2024-11-09T23:18:51Z' - torchscript_onnx_qnn: - inference_time: 2066.0 - throughput: 484.027105517909 + inference_time: 2072.0 + throughput: 482.6254826254826 estimated_peak_memory_range: min: 602112 max: 602112 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: j56yz2e6p + job_id: jp8q3rlop job_status: Passed torchscript_onnx: - inference_time: 2043.0 - throughput: 489.47626040137055 + inference_time: 2051.0 + throughput: 487.56704046806436 estimated_peak_memory_range: - min: 17227776 - max: 17227776 + min: 17104896 + max: 17104896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jgdxqnqrp + job_id: jpedrx9v5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:30:32Z' + timestamp: '2024-11-09T23:18:55Z' diff --git a/qai_hub_models/models/densenet121_quantized/README.md b/qai_hub_models/models/densenet121_quantized/README.md index d0ac37c1..e9abe17b 100644 --- a/qai_hub_models/models/densenet121_quantized/README.md +++ b/qai_hub_models/models/densenet121_quantized/README.md @@ -5,8 +5,7 @@ Densenet is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of DenseNet-121-Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/densenet121_quantized). diff --git a/qai_hub_models/models/densenet121_quantized/perf.yaml b/qai_hub_models/models/densenet121_quantized/perf.yaml index 07f580e1..913dbbb6 100644 --- a/qai_hub_models/models/densenet121_quantized/perf.yaml +++ b/qai_hub_models/models/densenet121_quantized/perf.yaml @@ -20,6 +20,8 @@ aggregated: - Snapdragon X Plus 8-Core CRD - QCS6490 (Proxy) - RB3 Gen 2 (Proxy) + - QCS8250 (Proxy) + - RB5 (Proxy) - QCS8450 (Proxy) - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) @@ -36,6 +38,7 @@ aggregated: - Snapdragon® X Elite - Snapdragon® X Plus 8-Core - QCS6490 Proxy + - QCS8250 Proxy - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy @@ -46,11 +49,11 @@ models: - name: DenseNet-121-Quantized performance_metrics: - torchscript_onnx_qnn: - inference_time: 1749.0 - throughput: 571.7552887364208 + inference_time: 1752.0 + throughput: 570.7762557077625 estimated_peak_memory_range: - min: 16384 - max: 295900648 + min: 172032 + max: 10445928 primary_compute_unit: NPU precision: int8 layer_info: @@ -58,22 +61,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: j56yze16p + job_id: jpv61yv75 job_status: Passed torchscript_onnx: - inference_time: 29070.0 - throughput: 34.39972480220158 + inference_time: 36701.0 + throughput: 27.247213972371323 estimated_peak_memory_range: - min: 7892992 - max: 13108848 + min: 10444800 + max: 628625184 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 325 + layers_on_npu: 379 layers_on_gpu: 0 - layers_on_cpu: 27 - total_layers: 352 - job_id: jp4ldkx85 + layers_on_cpu: 45 + total_layers: 424 + job_id: j5mnw13yp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -82,51 +85,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:53:31Z' + timestamp: '2024-11-09T23:50:45Z' - torchscript_onnx_qnn: - inference_time: 1743.0 - throughput: 573.7234652897304 - estimated_peak_memory_range: - min: 53248 - max: 295553360 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 215 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 215 - job_id: jp3j1vm3g - job_status: Passed - torchscript_onnx: - inference_time: 29436.0 - throughput: 33.97200706617747 - estimated_peak_memory_range: - min: 7876608 - max: 13197944 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 325 - layers_on_gpu: 0 - layers_on_cpu: 27 - total_layers: 352 - job_id: jpxk6n735 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:53:32Z' - - torchscript_onnx_qnn: - inference_time: 1217.0 - throughput: 821.6926869350863 + inference_time: 1224.0 + throughput: 816.9934640522875 estimated_peak_memory_range: min: 163840 - max: 24795600 + max: 22848784 primary_compute_unit: NPU precision: int8 layer_info: @@ -134,22 +99,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jgo2nkvqp + job_id: jgjv06e7g job_status: Passed torchscript_onnx: - inference_time: 22745.0 - throughput: 43.96570674873598 + inference_time: 27768.0 + throughput: 36.01267646211466 estimated_peak_memory_range: - min: 7827456 - max: 1063946000 + min: 12144640 + max: 1094024976 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 325 + layers_on_npu: 379 layers_on_gpu: 0 - layers_on_cpu: 27 - total_layers: 352 - job_id: j5mn6qwdp + layers_on_cpu: 45 + total_layers: 424 + job_id: jgn69dev5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -158,13 +123,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:53:34Z' + timestamp: '2024-11-09T23:50:46Z' - torchscript_onnx_qnn: - inference_time: 1177.0 - throughput: 849.6176720475786 + inference_time: 1008.0 + throughput: 992.063492063492 estimated_peak_memory_range: - min: 3530752 - max: 31744752 + min: 0 + max: 27668464 primary_compute_unit: NPU precision: int8 layer_info: @@ -172,7 +137,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jgdxqerrp + job_id: jpedr0k75 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -181,13 +146,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:53:38Z' + timestamp: '2024-11-09T23:50:48Z' - torchscript_onnx_qnn: - inference_time: 6594.0 - throughput: 151.6530178950561 + inference_time: 6702.0 + throughput: 149.20919128618323 estimated_peak_memory_range: - min: 212992 - max: 7996288 + min: 163840 + max: 8449056 primary_compute_unit: NPU precision: int8 layer_info: @@ -195,7 +160,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jpv6r01k5 + job_id: jgz3xqrz5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -204,13 +169,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-27T00:53:15Z' + timestamp: '2024-11-09T23:50:30Z' - torchscript_onnx_qnn: - inference_time: 1669.0 - throughput: 599.1611743559017 + inference_time: 1670.0 + throughput: 598.8023952095808 estimated_peak_memory_range: min: 180224 - max: 1456288 + max: 1334912 primary_compute_unit: NPU precision: int8 layer_info: @@ -218,7 +183,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jgjv2z0vg + job_id: j5wed0qz5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -227,13 +192,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:53:17Z' + timestamp: '2024-11-09T23:50:32Z' - torchscript_onnx_qnn: - inference_time: 1676.0 - throughput: 596.6587112171837 + inference_time: 1668.0 + throughput: 599.5203836930456 estimated_peak_memory_range: - min: 180224 - max: 1501272 + min: 184320 + max: 1844896 primary_compute_unit: NPU precision: int8 layer_info: @@ -241,7 +206,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jp14wx28p + job_id: jp14dkekp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -250,13 +215,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T02:08:34Z' + timestamp: '2024-11-09T23:50:36Z' - torchscript_onnx_qnn: - inference_time: 1676.0 - throughput: 596.6587112171837 + inference_time: 1671.0 + throughput: 598.4440454817475 estimated_peak_memory_range: - min: 180224 - max: 1529104 + min: 184320 + max: 1423200 primary_compute_unit: NPU precision: int8 layer_info: @@ -264,7 +229,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jgdxqlnrp + job_id: jgdxryokp job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -273,13 +238,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T02:08:36Z' + timestamp: '2024-11-09T23:50:38Z' - torchscript_onnx_qnn: - inference_time: 1670.0 - throughput: 598.8023952095808 + inference_time: 1671.0 + throughput: 598.4440454817475 estimated_peak_memory_range: min: 184320 - max: 1432720 + max: 1379016 primary_compute_unit: NPU precision: int8 layer_info: @@ -287,7 +252,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jp4ld0n85 + job_id: j57yj1xq5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -296,13 +261,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T02:08:39Z' + timestamp: '2024-11-09T23:50:39Z' - torchscript_onnx_qnn: - inference_time: 2833.0 - throughput: 352.98270384751146 + inference_time: 2844.0 + throughput: 351.6174402250352 estimated_peak_memory_range: - min: 0 - max: 5866272 + min: 139264 + max: 5988448 primary_compute_unit: NPU precision: int8 layer_info: @@ -310,7 +275,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: j57yl32v5 + job_id: jp4lx6vq5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -319,13 +284,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T02:08:38Z' + timestamp: '2024-11-09T23:50:41Z' - torchscript_onnx_qnn: - inference_time: 2161.0 - throughput: 462.7487274409995 + inference_time: 2124.0 + throughput: 470.8097928436911 estimated_peak_memory_range: - min: 143360 - max: 25494464 + min: 163840 + max: 27766992 primary_compute_unit: NPU precision: int8 layer_info: @@ -333,7 +298,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jp14wyd8p + job_id: jpxk78yj5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -342,13 +307,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:53:26Z' + timestamp: '2024-11-09T23:50:43Z' - torchscript_onnx_qnn: - inference_time: 1830.0 - throughput: 546.448087431694 + inference_time: 1864.0 + throughput: 536.480686695279 estimated_peak_memory_range: - min: 405504 - max: 405504 + min: 487424 + max: 487424 primary_compute_unit: NPU precision: int8 layer_info: @@ -356,22 +321,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jpedwero5 + job_id: jg9l37wqg job_status: Passed torchscript_onnx: - inference_time: 32571.0 - throughput: 30.70215836173283 + inference_time: 39197.0 + throughput: 25.512156542592546 estimated_peak_memory_range: - min: 48689152 - max: 48689152 + min: 48328704 + max: 48328704 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 325 + layers_on_npu: 379 layers_on_gpu: 0 - layers_on_cpu: 27 - total_layers: 352 - job_id: jgn6ml9k5 + layers_on_cpu: 45 + total_layers: 424 + job_id: jp2k7qmxp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -380,4 +345,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:53:36Z' + timestamp: '2024-11-09T23:50:50Z' diff --git a/qai_hub_models/models/detr_resnet101/README.md b/qai_hub_models/models/detr_resnet101/README.md index 3ca26748..d56ce509 100644 --- a/qai_hub_models/models/detr_resnet101/README.md +++ b/qai_hub_models/models/detr_resnet101/README.md @@ -5,8 +5,7 @@ DETR is a machine learning model that can detect objects (trained on COCO dataset). -This is based on the implementation of DETR-ResNet101 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/detr_resnet101). diff --git a/qai_hub_models/models/detr_resnet101/export.py b/qai_hub_models/models/detr_resnet101/export.py index 39d6bb34..09f83e20 100644 --- a/qai_hub_models/models/detr_resnet101/export.py +++ b/qai_hub_models/models/detr_resnet101/export.py @@ -205,9 +205,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser( - model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False - ) + parser = export_parser(model_cls=Model, supports_qnn=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/detr_resnet101/perf.yaml b/qai_hub_models/models/detr_resnet101/perf.yaml index 665eaba7..2cb3c49a 100644 --- a/qai_hub_models/models/detr_resnet101/perf.yaml +++ b/qai_hub_models/models/detr_resnet101/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,55 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: DETR-ResNet101 performance_metrics: - torchscript_onnx_tflite: - inference_time: 15270.0 - throughput: 65.48788474132286 + inference_time: 15217.0 + throughput: 65.71597555365709 estimated_peak_memory_range: - min: 81920 - max: 3008672 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 856 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 856 - job_id: j57yl22l5 - job_status: Passed - torchscript_onnx: - inference_time: 16071.0 - throughput: 62.223881525729574 - estimated_peak_memory_range: - min: 73728 - max: 133745784 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 886 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 886 - job_id: jgz3j22k5 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:29:45Z' - - torchscript_onnx_tflite: - inference_time: 15152.0 - throughput: 65.99788806758184 - estimated_peak_memory_range: - min: 86016 - max: 2782936 + min: 77824 + max: 3149632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -91,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: jp4ldnnv5 + job_id: jgjv0y9eg job_status: Passed torchscript_onnx: - inference_time: 16031.0 - throughput: 62.37914041544507 + inference_time: 15976.0 + throughput: 62.59389083625438 estimated_peak_memory_range: - min: 36864 - max: 133400056 + min: 40960 + max: 133695456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,7 +70,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 886 - job_id: j5we3w265 + job_id: jglv08jm5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -115,13 +79,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:29:46Z' + timestamp: '2024-11-09T23:18:08Z' - torchscript_onnx_tflite: - inference_time: 12453.0 - throughput: 80.30193527664017 + inference_time: 10866.0 + throughput: 92.03018590097552 estimated_peak_memory_range: - min: 53248 - max: 317581952 + min: 73728 + max: 320158848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -129,14 +93,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: jpxk69915 + job_id: jpedrxqv5 job_status: Passed torchscript_onnx: - inference_time: 13392.0 - throughput: 74.67144563918758 + inference_time: 11807.0 + throughput: 84.69551960701278 estimated_peak_memory_range: - min: 307200 - max: 283826848 + min: 0 + max: 281524864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -144,7 +108,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 886 - job_id: jg9ly0jlg + job_id: j56y3mkyp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -153,13 +117,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:29:47Z' + timestamp: '2024-11-09T23:18:10Z' - torchscript_onnx_tflite: - inference_time: 8885.0 - throughput: 112.54924029262803 + inference_time: 10553.0 + throughput: 94.7597839476926 estimated_peak_memory_range: - min: 8192 - max: 120266128 + min: 57344 + max: 121400544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -167,14 +131,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: jp0z24465 + job_id: jgz3xy6x5 job_status: Passed torchscript_onnx: - inference_time: 11363.0 - throughput: 88.00492827598346 + inference_time: 11396.0 + throughput: 87.75008775008774 estimated_peak_memory_range: - min: 1331200 - max: 122149968 + min: 1220608 + max: 124027120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -182,7 +146,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 886 - job_id: jgdxqneep + job_id: jp3j47yng job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -191,13 +155,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:29:49Z' + timestamp: '2024-11-09T23:18:11Z' - torchscript_onnx_tflite: - inference_time: 15085.0 - throughput: 66.29101756711965 + inference_time: 15088.0 + throughput: 66.27783669141039 estimated_peak_memory_range: - min: 65536 - max: 2835752 + min: 81920 + max: 2330784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -205,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: j5mn6eewp + job_id: j5wedzkm5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -214,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:29:26Z' + timestamp: '2024-11-09T23:17:50Z' - torchscript_onnx_tflite: - inference_time: 15123.0 - throughput: 66.12444620776301 + inference_time: 15335.0 + throughput: 65.21030322791 estimated_peak_memory_range: - min: 102400 - max: 3057448 + min: 77824 + max: 3121904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -228,7 +192,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: jpxk6jr95 + job_id: jg9l32r8g job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -237,13 +201,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:56:51Z' + timestamp: '2024-11-09T23:17:51Z' - torchscript_onnx_tflite: - inference_time: 15250.0 - throughput: 65.57377049180327 + inference_time: 15154.0 + throughput: 65.98917777484492 estimated_peak_memory_range: - min: 77824 - max: 2357344 + min: 61440 + max: 2465808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -251,7 +215,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: j5mn62kqp + job_id: jp14d197p job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -260,13 +224,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:56:52Z' + timestamp: '2024-11-09T23:17:52Z' - torchscript_onnx_tflite: - inference_time: 15196.0 - throughput: 65.80679126085812 + inference_time: 15232.0 + throughput: 65.65126050420169 estimated_peak_memory_range: - min: 86016 - max: 2383504 + min: 77824 + max: 2357088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -274,7 +238,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: jgn6myqm5 + job_id: jgdxr4kzp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -283,13 +247,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:56:53Z' + timestamp: '2024-11-09T23:17:53Z' + - torchscript_onnx_tflite: + inference_time: 23914.0 + throughput: 41.8165091578155 + estimated_peak_memory_range: + min: 81920 + max: 74009360 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 856 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 856 + job_id: j57yjnm95 + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T23:17:54Z' - torchscript_onnx_tflite: - inference_time: 21002.0 - throughput: 47.614512903533 + inference_time: 21134.0 + throughput: 47.31711933377496 estimated_peak_memory_range: - min: 90112 - max: 256165632 + min: 53248 + max: 255948752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -297,7 +284,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: jpy1jzz7p + job_id: jp4lx4715 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -306,13 +293,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:29:30Z' + timestamp: '2024-11-09T23:17:55Z' - torchscript_onnx: - inference_time: 17404.0 - throughput: 57.45805561939784 + inference_time: 17418.0 + throughput: 57.41187277528993 estimated_peak_memory_range: - min: 121651200 - max: 121651200 + min: 121540608 + max: 121540608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -320,7 +307,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 886 - job_id: jp14w2y2p + job_id: jgo21wjkp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -329,4 +316,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:29:48Z' + timestamp: '2024-11-09T23:18:12Z' diff --git a/qai_hub_models/models/detr_resnet101_dc5/README.md b/qai_hub_models/models/detr_resnet101_dc5/README.md index 17aabd5f..01764933 100644 --- a/qai_hub_models/models/detr_resnet101_dc5/README.md +++ b/qai_hub_models/models/detr_resnet101_dc5/README.md @@ -5,8 +5,7 @@ DETR is a machine learning model that can detect objects (trained on COCO dataset). -This is based on the implementation of DETR-ResNet101-DC5 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/detr_resnet101_dc5). diff --git a/qai_hub_models/models/detr_resnet101_dc5/export.py b/qai_hub_models/models/detr_resnet101_dc5/export.py index 826f7d67..b17fe923 100644 --- a/qai_hub_models/models/detr_resnet101_dc5/export.py +++ b/qai_hub_models/models/detr_resnet101_dc5/export.py @@ -205,9 +205,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser( - model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False - ) + parser = export_parser(model_cls=Model, supports_qnn=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/detr_resnet101_dc5/perf.yaml b/qai_hub_models/models/detr_resnet101_dc5/perf.yaml index 4066b5ee..e1ffb286 100644 --- a/qai_hub_models/models/detr_resnet101_dc5/perf.yaml +++ b/qai_hub_models/models/detr_resnet101_dc5/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: DETR-ResNet101-DC5 performance_metrics: - torchscript_onnx_tflite: - inference_time: 82128.0 - throughput: 12.176115332164427 + inference_time: 82308.0 + throughput: 12.149487291636293 estimated_peak_memory_range: - min: 188416 - max: 2557192 + min: 200704 + max: 2360936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 857 - job_id: jg9ly0llg + job_id: jpedrx0o5 job_status: Passed torchscript_onnx: - inference_time: 104060.0 - throughput: 9.609840476648088 + inference_time: 91771.0 + throughput: 10.896688496365954 estimated_peak_memory_range: - min: 159744 - max: 133883528 + min: 126976 + max: 133485072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,7 +70,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 886 - job_id: jpv6rq6j5 + job_id: jp8q3rdop job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -77,13 +79,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:28:51Z' + timestamp: '2024-11-09T23:17:13Z' - torchscript_onnx_tflite: - inference_time: 81005.0 - throughput: 12.344916980433307 + inference_time: 64494.0 + throughput: 15.505318324185195 estimated_peak_memory_range: - min: 20480 - max: 3036584 + min: 118784 + max: 575225664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -91,14 +93,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 857 - job_id: jp14w242p + job_id: jgz3xyqo5 job_status: Passed torchscript_onnx: - inference_time: 103970.0 - throughput: 9.618159084351255 + inference_time: 78674.0 + throughput: 12.710679512926761 estimated_peak_memory_range: - min: 159744 - max: 134067496 + min: 2998272 + max: 591446032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,30 +108,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 886 - job_id: jgjv2dvxg - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:28:52Z' - - torchscript_onnx_tflite: - inference_time: 66992.0 - throughput: 14.927155481251493 - estimated_peak_memory_range: - min: 172032 - max: 574449360 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 857 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 857 - job_id: jgdxqnxep + job_id: jgkel0wng job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -138,13 +117,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:28:53Z' + timestamp: '2024-11-09T23:17:14Z' - torchscript_onnx_tflite: - inference_time: 60647.0 - throughput: 16.48886177387175 + inference_time: 61156.0 + throughput: 16.351625351559946 estimated_peak_memory_range: - min: 65536 - max: 292270736 + min: 167936 + max: 293182080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -152,14 +131,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 857 - job_id: jprv26v9g + job_id: j5wedz035 job_status: Passed torchscript_onnx: - inference_time: 65653.0 - throughput: 15.23159642362116 + inference_time: 68166.0 + throughput: 14.670070122935188 estimated_peak_memory_range: - min: 2961408 - max: 340373840 + min: 696320 + max: 337343792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -167,7 +146,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 886 - job_id: j5we3ww65 + job_id: j5q671xop job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -176,13 +155,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:28:55Z' + timestamp: '2024-11-09T23:17:15Z' - torchscript_onnx_tflite: - inference_time: 81191.0 - throughput: 12.316636080353733 + inference_time: 81845.0 + throughput: 12.218217362086872 estimated_peak_memory_range: - min: 16384 - max: 3568968 + min: 90112 + max: 2751728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -190,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 857 - job_id: j57yl2yl5 + job_id: jg9l327wg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -199,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:28:33Z' + timestamp: '2024-11-09T23:16:55Z' - torchscript_onnx_tflite: - inference_time: 89213.0 - throughput: 11.209128714425027 + inference_time: 81266.0 + throughput: 12.305269116235571 estimated_peak_memory_range: - min: 167936 - max: 3793120 + min: 159744 + max: 2506856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -213,7 +192,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 857 - job_id: jpedw1605 + job_id: jp14d1k8p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -222,13 +201,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:56:17Z' + timestamp: '2024-11-09T23:16:56Z' - torchscript_onnx_tflite: - inference_time: 90809.0 - throughput: 11.012124348908149 + inference_time: 81245.0 + throughput: 12.308449750753892 estimated_peak_memory_range: - min: 303104 - max: 2455528 + min: 12288 + max: 2874504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -236,7 +215,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 857 - job_id: jgz3j9z65 + job_id: jgdxr4yrp job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -245,13 +224,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:56:18Z' + timestamp: '2024-11-09T23:16:57Z' - torchscript_onnx_tflite: - inference_time: 91098.0 - throughput: 10.977189400425916 + inference_time: 82208.0 + throughput: 12.164266251459711 estimated_peak_memory_range: - min: 167936 - max: 2798688 + min: 159744 + max: 3526624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -259,7 +238,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 857 - job_id: j5we3vyj5 + job_id: j5wedz0m5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -268,13 +247,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:56:19Z' + timestamp: '2024-11-09T23:16:58Z' + - torchscript_onnx_tflite: + inference_time: 101051.0 + throughput: 9.895993112388794 + estimated_peak_memory_range: + min: 172032 + max: 243425728 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 857 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 857 + job_id: jg9l3278g + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T23:16:59Z' - torchscript_onnx_tflite: - inference_time: 95997.0 - throughput: 10.416992197672844 + inference_time: 95907.0 + throughput: 10.426767597777014 estimated_peak_memory_range: - min: 282624 - max: 514532624 + min: 53248 + max: 508153984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -282,7 +284,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 857 - job_id: jgn6m06r5 + job_id: jp14d1k7p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -291,13 +293,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:28:37Z' + timestamp: '2024-11-09T23:17:00Z' - torchscript_onnx: - inference_time: 70068.0 - throughput: 14.271850202660273 + inference_time: 70091.0 + throughput: 14.267166968655035 estimated_peak_memory_range: - min: 125362176 - max: 125362176 + min: 125292544 + max: 125292544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -305,7 +307,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 886 - job_id: jgz3j23k5 + job_id: jglv08dm5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -314,4 +316,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:28:54Z' + timestamp: '2024-11-09T23:17:16Z' diff --git a/qai_hub_models/models/detr_resnet50/README.md b/qai_hub_models/models/detr_resnet50/README.md index 5086b09b..37c8d678 100644 --- a/qai_hub_models/models/detr_resnet50/README.md +++ b/qai_hub_models/models/detr_resnet50/README.md @@ -5,8 +5,7 @@ DETR is a machine learning model that can detect objects (trained on COCO dataset). -This is based on the implementation of DETR-ResNet50 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/detr_resnet50). diff --git a/qai_hub_models/models/detr_resnet50/export.py b/qai_hub_models/models/detr_resnet50/export.py index c46ac3ce..68de8c14 100644 --- a/qai_hub_models/models/detr_resnet50/export.py +++ b/qai_hub_models/models/detr_resnet50/export.py @@ -205,9 +205,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser( - model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False - ) + parser = export_parser(model_cls=Model, supports_qnn=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/detr_resnet50/perf.yaml b/qai_hub_models/models/detr_resnet50/perf.yaml index 1bb407f3..d1c19c60 100644 --- a/qai_hub_models/models/detr_resnet50/perf.yaml +++ b/qai_hub_models/models/detr_resnet50/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: DETR-ResNet50 performance_metrics: - torchscript_onnx_tflite: - inference_time: 10838.0 - throughput: 92.26794611551946 + inference_time: 11032.0 + throughput: 90.64539521392314 estimated_peak_memory_range: - min: 61440 - max: 2696960 + min: 53248 + max: 2512208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 788 - job_id: jp4lvx225 + job_id: jgo21wwqp job_status: Passed torchscript_onnx: - inference_time: 12276.0 - throughput: 81.45975887911372 + inference_time: 12244.0 + throughput: 81.67265599477295 estimated_peak_memory_range: - min: 36864 - max: 101116008 + min: 12288 + max: 101379336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,7 +70,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 767 - job_id: jglv30925 + job_id: jgkel0kwg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -77,13 +79,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:47:01Z' + timestamp: '2024-11-09T23:16:17Z' - torchscript_onnx_tflite: - inference_time: 10965.0 - throughput: 91.19927040583676 + inference_time: 7656.0 + throughput: 130.61650992685475 estimated_peak_memory_range: - min: 57344 - max: 2636808 + min: 126976 + max: 258259136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -91,37 +93,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 788 - job_id: jgz3j3kk5 + job_id: jpv61mnk5 job_status: Passed torchscript_onnx: - inference_time: 12300.0 - throughput: 81.30081300813008 - estimated_peak_memory_range: - min: 57344 - max: 1271266320 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 767 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 767 - job_id: jp3j1n0lg - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:27:59Z' - - torchscript_onnx: - inference_time: 9772.0 - throughput: 102.33319688907082 + inference_time: 8821.0 + throughput: 113.36583153837434 estimated_peak_memory_range: - min: 2789376 - max: 226073296 + min: 2891776 + max: 225366000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -129,7 +108,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 767 - job_id: jgo2nz6xp + job_id: j5q671dnp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -138,13 +117,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:28:00Z' + timestamp: '2024-11-09T23:16:18Z' - torchscript_onnx_tflite: - inference_time: 7238.0 - throughput: 138.15971262779775 + inference_time: 7217.0 + throughput: 138.56172925038103 estimated_peak_memory_range: - min: 53248 - max: 93299840 + min: 57344 + max: 93010912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -152,14 +131,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 788 - job_id: jpxk69o15 + job_id: jgjv0y8vg job_status: Passed torchscript_onnx: - inference_time: 8541.0 - throughput: 117.08230886313078 + inference_time: 8470.0 + throughput: 118.06375442739079 estimated_peak_memory_range: - min: 2895872 - max: 95817792 + min: 2863104 + max: 93916576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -167,7 +146,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 767 - job_id: jgjv2dnxg + job_id: jglv08qj5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -176,13 +155,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:28:02Z' + timestamp: '2024-11-09T23:16:19Z' - torchscript_onnx_tflite: - inference_time: 10817.0 - throughput: 92.44707405010631 + inference_time: 10807.0 + throughput: 92.53261774775608 estimated_peak_memory_range: - min: 61440 - max: 3072360 + min: 94208 + max: 2501920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -190,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 788 - job_id: jg9ly0nlg + job_id: jpedrxno5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -199,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:27:40Z' + timestamp: '2024-11-09T23:15:59Z' - torchscript_onnx_tflite: - inference_time: 10866.0 - throughput: 92.03018590097552 + inference_time: 10957.0 + throughput: 91.26585744273068 estimated_peak_memory_range: - min: 53248 - max: 2316160 + min: 16384 + max: 2120840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -213,7 +192,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 788 - job_id: jp8qm9j8p + job_id: jgz3xy0o5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -222,13 +201,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:55:43Z' + timestamp: '2024-11-09T23:16:00Z' - torchscript_onnx_tflite: - inference_time: 10848.0 - throughput: 92.18289085545723 + inference_time: 10897.0 + throughput: 91.76837661741764 estimated_peak_memory_range: - min: 57344 - max: 2487304 + min: 94208 + max: 2356280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -236,7 +215,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 788 - job_id: jgkeqn6og + job_id: j5wedzr35 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -245,13 +224,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:55:44Z' + timestamp: '2024-11-09T23:16:01Z' - torchscript_onnx_tflite: - inference_time: 10841.0 - throughput: 92.2424130615257 + inference_time: 10941.0 + throughput: 91.39932364500503 estimated_peak_memory_range: - min: 69632 - max: 2511736 + min: 61440 + max: 2197880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -259,7 +238,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 788 - job_id: j5q6rk4mp + job_id: jg9l32qwg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -268,13 +247,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:55:45Z' + timestamp: '2024-11-09T23:16:02Z' + - torchscript_onnx_tflite: + inference_time: 16440.0 + throughput: 60.82725060827251 + estimated_peak_memory_range: + min: 81920 + max: 66972304 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 788 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 788 + job_id: jp14d1m8p + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T23:16:03Z' - torchscript_onnx_tflite: - inference_time: 14456.0 - throughput: 69.17542888765911 + inference_time: 14430.0 + throughput: 69.3000693000693 estimated_peak_memory_range: - min: 90112 - max: 222263408 + min: 0 + max: 222463072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -282,7 +284,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 788 - job_id: jp4ldnrv5 + job_id: jgdxr4mrp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -291,13 +293,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:27:44Z' + timestamp: '2024-11-09T23:16:04Z' - torchscript_onnx: - inference_time: 13406.0 - throughput: 74.59346561241236 + inference_time: 13375.0 + throughput: 74.76635514018692 estimated_peak_memory_range: - min: 83070976 - max: 83070976 + min: 84058112 + max: 84058112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -305,7 +307,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 767 - job_id: jpv6rqkj5 + job_id: j56y3m06p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -314,4 +316,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:28:01Z' + timestamp: '2024-11-09T23:16:20Z' diff --git a/qai_hub_models/models/detr_resnet50_dc5/README.md b/qai_hub_models/models/detr_resnet50_dc5/README.md index 7c4b569f..f10d45e8 100644 --- a/qai_hub_models/models/detr_resnet50_dc5/README.md +++ b/qai_hub_models/models/detr_resnet50_dc5/README.md @@ -5,8 +5,7 @@ DETR is a machine learning model that can detect objects (trained on COCO dataset). -This is based on the implementation of DETR-ResNet50-DC5 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/detr_resnet50_dc5). diff --git a/qai_hub_models/models/detr_resnet50_dc5/export.py b/qai_hub_models/models/detr_resnet50_dc5/export.py index 4aa0c571..f8ccd99b 100644 --- a/qai_hub_models/models/detr_resnet50_dc5/export.py +++ b/qai_hub_models/models/detr_resnet50_dc5/export.py @@ -205,9 +205,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser( - model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False - ) + parser = export_parser(model_cls=Model, supports_qnn=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/detr_resnet50_dc5/perf.yaml b/qai_hub_models/models/detr_resnet50_dc5/perf.yaml index 0ba0ea52..758a9216 100644 --- a/qai_hub_models/models/detr_resnet50_dc5/perf.yaml +++ b/qai_hub_models/models/detr_resnet50_dc5/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: DETR-ResNet50-DC5 performance_metrics: - torchscript_onnx_tflite: - inference_time: 75450.0 - throughput: 13.253810470510272 + inference_time: 74270.0 + throughput: 13.464386697185944 estimated_peak_memory_range: - min: 159744 - max: 3284936 + min: 57344 + max: 2581160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,52 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 789 - job_id: jgz3j3965 + job_id: jglv08wj5 job_status: Passed torchscript_onnx: - inference_time: 90894.0 - throughput: 11.001826303166325 + inference_time: 89233.0 + throughput: 11.20661638631448 estimated_peak_memory_range: - min: 147456 - max: 100392984 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 767 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 767 - job_id: jgkeqem2g - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:27:09Z' - - torchscript_onnx_tflite: - inference_time: 75455.0 - throughput: 13.25293221125174 - estimated_peak_memory_range: - min: 163840 - max: 2309744 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 789 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 789 - job_id: j5we3evj5 - job_status: Passed - torchscript_onnx: - inference_time: 92197.0 - throughput: 10.846339902599867 - estimated_peak_memory_range: - min: 167936 - max: 99910624 + min: 151552 + max: 99985328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,7 +70,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 767 - job_id: j5q6r6o4p + job_id: jpy14228p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -115,13 +79,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:27:10Z' + timestamp: '2024-11-09T23:15:27Z' - torchscript_onnx_tflite: - inference_time: 61365.0 - throughput: 16.295934164425976 + inference_time: 59252.0 + throughput: 16.877067440761493 estimated_peak_memory_range: - min: 163840 - max: 516875536 + min: 147456 + max: 516756784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -129,14 +93,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 789 - job_id: jg9lyl1vg + job_id: j56y3mo6p job_status: Passed torchscript_onnx: - inference_time: 76649.0 - throughput: 13.04648462471787 + inference_time: 76827.0 + throughput: 13.016257305374413 estimated_peak_memory_range: - min: 0 - max: 527613392 + min: 3018752 + max: 530579872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -144,7 +108,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 767 - job_id: jglv2vr85 + job_id: jp0z19995 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -153,13 +117,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:27:11Z' + timestamp: '2024-11-09T23:15:28Z' - torchscript_onnx_tflite: - inference_time: 55924.0 - throughput: 17.88141048565911 + inference_time: 55125.0 + throughput: 18.140589569160998 estimated_peak_memory_range: - min: 106496 - max: 265382464 + min: 98304 + max: 265656496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -167,14 +131,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 789 - job_id: jgdxqxlep + job_id: jp3j47o3g job_status: Passed torchscript_onnx: - inference_time: 62083.0 - throughput: 16.10746903339078 + inference_time: 62899.0 + throughput: 15.898503950778231 estimated_peak_memory_range: min: 0 - max: 306799312 + max: 305829760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -182,7 +146,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 767 - job_id: jp3j1j2lg + job_id: jp8q3rrkp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -191,13 +155,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:27:13Z' + timestamp: '2024-11-09T23:15:29Z' - torchscript_onnx_tflite: - inference_time: 74357.0 - throughput: 13.448632946460993 + inference_time: 74483.0 + throughput: 13.4258824161218 estimated_peak_memory_range: - min: 98304 - max: 4181232 + min: 147456 + max: 3060720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -205,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 789 - job_id: jp14w4llp + job_id: jgo21wdqp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -214,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:26:51Z' + timestamp: '2024-11-09T23:15:09Z' - torchscript_onnx_tflite: - inference_time: 81671.0 - throughput: 12.24424826437781 + inference_time: 83592.0 + throughput: 11.962867260024883 estimated_peak_memory_range: - min: 167936 - max: 2735672 + min: 16384 + max: 2086688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -228,7 +192,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 789 - job_id: j57ylwor5 + job_id: jpv61mmk5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -237,13 +201,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:55:14Z' + timestamp: '2024-11-09T23:15:10Z' - torchscript_onnx_tflite: - inference_time: 74150.0 - throughput: 13.486176668914363 + inference_time: 82707.0 + throughput: 12.090875016624953 estimated_peak_memory_range: - min: 167936 - max: 2301928 + min: 12288 + max: 2120784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -251,7 +215,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 789 - job_id: jp4ldoel5 + job_id: jgjv0yyvg job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -260,13 +224,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:55:14Z' + timestamp: '2024-11-09T23:15:11Z' - torchscript_onnx_tflite: - inference_time: 83335.0 - throughput: 11.999760004799905 + inference_time: 75931.0 + throughput: 13.169851575772741 estimated_peak_memory_range: - min: 20480 - max: 2192216 + min: 184320 + max: 2466392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -274,7 +238,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 789 - job_id: jpxk6j095 + job_id: jpedrxxo5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -283,13 +247,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:55:15Z' + timestamp: '2024-11-09T23:15:12Z' - torchscript_onnx_tflite: - inference_time: 84513.0 - throughput: 11.832499142143812 + inference_time: 92792.0 + throughput: 10.776791102681265 estimated_peak_memory_range: - min: 16384 - max: 474448192 + min: 204800 + max: 235149536 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 789 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 789 + job_id: jgz3xyyo5 + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T23:15:13Z' + - torchscript_onnx_tflite: + inference_time: 91722.0 + throughput: 10.902509757746234 + estimated_peak_memory_range: + min: 499712 + max: 476882416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -297,7 +284,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 789 - job_id: jp14w4x2p + job_id: j5wedzz35 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -306,13 +293,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:26:55Z' + timestamp: '2024-11-09T23:15:14Z' - torchscript_onnx: - inference_time: 65318.0 - throughput: 15.309715545485165 + inference_time: 65268.0 + throughput: 15.321443892872464 estimated_peak_memory_range: - min: 86691840 - max: 86691840 + min: 87830528 + max: 87830528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -320,7 +307,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 767 - job_id: j56yzyl0p + job_id: jgkel00wg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -329,4 +316,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:27:12Z' + timestamp: '2024-11-09T23:15:30Z' diff --git a/qai_hub_models/models/efficientnet_b0/README.md b/qai_hub_models/models/efficientnet_b0/README.md index 6a3dac7b..ef0e88ef 100644 --- a/qai_hub_models/models/efficientnet_b0/README.md +++ b/qai_hub_models/models/efficientnet_b0/README.md @@ -5,8 +5,7 @@ EfficientNetB0 is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of EfficientNet-B0 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/efficientnet_b0). diff --git a/qai_hub_models/models/efficientnet_b0/perf.yaml b/qai_hub_models/models/efficientnet_b0/perf.yaml index e594ff0d..2eb72d5e 100644 --- a/qai_hub_models/models/efficientnet_b0/perf.yaml +++ b/qai_hub_models/models/efficientnet_b0/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: EfficientNet-B0 performance_metrics: - torchscript_onnx_tflite: - inference_time: 1608.0 - throughput: 621.8905472636816 + inference_time: 1607.0 + throughput: 622.2775357809583 estimated_peak_memory_range: min: 28672 - max: 17973360 + max: 295338512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j56yzy67p + job_id: jpy142o8p job_status: Passed torchscript_onnx_qnn: - inference_time: 1665.0 - throughput: 600.6006006006006 + inference_time: 1666.0 + throughput: 600.2400960384153 estimated_peak_memory_range: - min: 16384 - max: 84515432 + min: 20480 + max: 225545008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: j57ylywr5 + job_id: jpv61m2k5 job_status: Passed torchscript_onnx: - inference_time: 1581.0 - throughput: 632.5110689437065 + inference_time: 1594.0 + throughput: 627.3525721455458 estimated_peak_memory_range: - min: 12288 - max: 15623616 + min: 274432 + max: 17853944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jgkeqenog + job_id: j5mnwk9dp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:26:18Z' + timestamp: '2024-11-09T23:14:35Z' - torchscript_onnx_tflite: - inference_time: 1602.0 - throughput: 624.2197253433209 + inference_time: 1124.0 + throughput: 889.6797153024911 estimated_peak_memory_range: - min: 36864 - max: 2465648 + min: 16384 + max: 79706944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jgo2n2ydp + job_id: jp0z19m95 job_status: Passed torchscript_onnx_qnn: - inference_time: 1679.0 - throughput: 595.5926146515783 + inference_time: 1172.0 + throughput: 853.2423208191126 estimated_peak_memory_range: - min: 16384 - max: 85301712 + min: 0 + max: 21454928 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: jp4ldlol5 + job_id: jgjv0y3vg job_status: Passed torchscript_onnx: - inference_time: 1603.0 - throughput: 623.8303181534623 + inference_time: 1141.0 + throughput: 876.4241893076249 estimated_peak_memory_range: - min: 12288 - max: 15677888 + min: 0 + max: 85820208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j5q6r6kmp + job_id: jgn69q1k5 job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:26:19Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T23:14:36Z' - torchscript_onnx_tflite: - inference_time: 1162.0 - throughput: 860.5851979345955 + inference_time: 1124.0 + throughput: 889.6797153024911 estimated_peak_memory_range: - min: 16384 - max: 79944176 + min: 12288 + max: 33110704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jpv6r63m5 + job_id: jp8q3rekp job_status: Passed torchscript_onnx_qnn: - inference_time: 1213.0 - throughput: 824.4023083264633 + inference_time: 953.0 + throughput: 1049.3179433368311 estimated_peak_memory_range: - min: 618496 - max: 16997616 + min: 0 + max: 16050384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: jpxk6kj95 + job_id: jpedrx6o5 job_status: Passed torchscript_onnx: - inference_time: 1197.0 - throughput: 835.421888053467 + inference_time: 1178.0 + throughput: 848.8964346349745 estimated_peak_memory_range: min: 0 - max: 85757376 + max: 35680304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jglv2vzl5 + job_id: jprv4dx0g job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:26:20Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T23:14:37Z' - torchscript_onnx_tflite: - inference_time: 1136.0 - throughput: 880.2816901408451 + inference_time: 1599.0 + throughput: 625.3908692933084 estimated_peak_memory_range: - min: 12288 - max: 33229792 + min: 32768 + max: 257884864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jgdxqxwlp + job_id: jgkel02wg job_status: Passed torchscript_onnx_qnn: - inference_time: 1173.0 - throughput: 852.5149190110827 + inference_time: 1563.0 + throughput: 639.7952655150352 estimated_peak_memory_range: - min: 0 - max: 15256432 + min: 630784 + max: 1914680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: jp8qmq98p - job_status: Passed - torchscript_onnx: - inference_time: 961.0 - throughput: 1040.5827263267429 - estimated_peak_memory_range: - min: 0 - max: 36702064 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 245 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 245 - job_id: jp3j1j3zg + job_id: jgz3xyzo5 job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:26:23Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T23:14:28Z' - torchscript_onnx_tflite: - inference_time: 1593.0 - throughput: 627.7463904582548 + inference_time: 1604.0 + throughput: 623.4413965087282 estimated_peak_memory_range: - min: 16384 - max: 2868696 + min: 20480 + max: 1546912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jgjv2vx8g + job_id: j5q671lnp job_status: Passed torchscript_onnx_qnn: - inference_time: 1560.0 - throughput: 641.025641025641 + inference_time: 1579.0 + throughput: 633.3122229259025 estimated_peak_memory_range: - min: 634880 - max: 1827464 + min: 626688 + max: 2331440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: j5mn6n2qp + job_id: jg9l32owg job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:26:10Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T23:14:30Z' - torchscript_onnx_tflite: - inference_time: 1603.0 - throughput: 623.8303181534623 + inference_time: 1605.0 + throughput: 623.0529595015577 estimated_peak_memory_range: - min: 28672 - max: 1629784 + min: 16384 + max: 2622896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jpedw1775 + job_id: jglv08yj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1576.0 - throughput: 634.5177664974619 + inference_time: 1575.0 + throughput: 634.9206349206349 estimated_peak_memory_range: - min: 643072 - max: 2050728 + min: 634880 + max: 1867448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: jg9ly1zqg + job_id: jp14d1o8p job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:54:46Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T23:14:31Z' - torchscript_onnx_tflite: - inference_time: 1602.0 - throughput: 624.2197253433209 + inference_time: 1607.0 + throughput: 622.2775357809583 estimated_peak_memory_range: - min: 20480 - max: 2896536 + min: 24576 + max: 1471336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jgz3j9lz5 + job_id: j56y3m86p job_status: Passed torchscript_onnx_qnn: - inference_time: 1569.0 - throughput: 637.3486297004462 + inference_time: 1572.0 + throughput: 636.1323155216285 estimated_peak_memory_range: - min: 32768 - max: 1625128 + min: 634880 + max: 2011712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: jp14wlnkp + job_id: jgdxr46rp job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:54:47Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T23:14:32Z' - torchscript_onnx_tflite: - inference_time: 1602.0 - throughput: 624.2197253433209 + inference_time: 3770.0 + throughput: 265.2519893899204 estimated_peak_memory_range: - min: 45056 - max: 1718552 + min: 16384 + max: 40261200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j5we3vlz5 + job_id: jp3j47z3g job_status: Passed torchscript_onnx_qnn: - inference_time: 1562.0 - throughput: 640.2048655569782 + inference_time: 3772.0 + throughput: 265.11134676564154 estimated_peak_memory_range: - min: 643072 - max: 1928288 + min: 0 + max: 5895840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: jgdxq9dkp + job_id: j57yjnov5 job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:54:48Z' + chipset: SA8295P + timestamp: '2024-11-09T23:14:33Z' - torchscript_onnx_tflite: - inference_time: 3071.0 - throughput: 325.626831650928 + inference_time: 3089.0 + throughput: 323.72936225315635 estimated_peak_memory_range: - min: 24576 - max: 88694736 + min: 20480 + max: 88412656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jp14w40lp + job_id: jgo21wlqp job_status: Passed torchscript_onnx_qnn: - inference_time: 3167.0 - throughput: 315.75623618566465 + inference_time: 3168.0 + throughput: 315.65656565656565 estimated_peak_memory_range: min: 618496 - max: 25208336 + max: 26833456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: jp0z2zqe5 + job_id: jpxk7r035 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:26:16Z' + timestamp: '2024-11-09T23:14:34Z' - torchscript_onnx_qnn: - inference_time: 1756.0 - throughput: 569.4760820045558 + inference_time: 1754.0 + throughput: 570.1254275940707 estimated_peak_memory_range: min: 602112 max: 602112 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: jgn6m6ym5 + job_id: j5wedzy35 job_status: Passed torchscript_onnx: - inference_time: 1709.0 - throughput: 585.1375073142189 + inference_time: 1694.0 + throughput: 590.318772136954 estimated_peak_memory_range: - min: 14721024 - max: 14721024 + min: 14614528 + max: 14614528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j56yzyj7p + job_id: jp2k7dorp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:26:21Z' + timestamp: '2024-11-09T23:14:39Z' diff --git a/qai_hub_models/models/efficientnet_b4/README.md b/qai_hub_models/models/efficientnet_b4/README.md new file mode 100644 index 00000000..a3a1fb37 --- /dev/null +++ b/qai_hub_models/models/efficientnet_b4/README.md @@ -0,0 +1,58 @@ +[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md) + + +# [EfficientNet-B4: Imagenet classifier and general purpose backbone](https://aihub.qualcomm.com/models/efficientnet_b4) + +EfficientNetB4 is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. + +{source_repo_details}This repository contains scripts for optimized on-device +export suitable to run on Qualcomm® devices. More details on model performance +accross various devices, can be found [here](https://aihub.qualcomm.com/models/efficientnet_b4). + +[Sign up](https://myaccount.qualcomm.com/signup) to start using Qualcomm AI Hub and run these models on a hosted Qualcomm® device. + + + + +## Example & Usage + + +Once installed, run the following simple CLI demo: + +```bash +python -m qai_hub_models.models.efficientnet_b4.demo +``` +More details on the CLI tool can be found with the `--help` option. See +[demo.py](demo.py) for sample usage of the model including pre/post processing +scripts. Please refer to our [general instructions on using +models](../../../#getting-started) for more usage instructions. + +## Export for on-device deployment + +This repository contains export scripts that produce a model optimized for +on-device deployment. This can be run as follows: + +```bash +python -m qai_hub_models.models.efficientnet_b4.export +``` +Additional options are documented with the `--help` option. Note that the above +script requires access to Deployment instructions for Qualcomm® AI Hub. + + +## License +* The license for the original implementation of EfficientNet-B4 can be found + [here](https://github.com/pytorch/vision/blob/main/LICENSE). +* The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) + + +## References +* [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946) +* [Source Model Implementation](https://github.com/pytorch/vision/blob/main/torchvision/models/efficientnet.py) + + + +## Community +* Join [our AI Hub Slack community](https://aihub.qualcomm.com/community/slack) to collaborate, post questions and learn more about on-device AI. +* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com). + + diff --git a/qai_hub_models/models/efficientnet_b4/__init__.py b/qai_hub_models/models/efficientnet_b4/__init__.py new file mode 100644 index 00000000..d982ebcc --- /dev/null +++ b/qai_hub_models/models/efficientnet_b4/__init__.py @@ -0,0 +1,10 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from qai_hub_models.models._shared.imagenet_classifier.app import ( # noqa: F401 + ImagenetClassifierApp as App, +) + +from .model import MODEL_ID # noqa: F401 +from .model import EfficientNetB4 as Model # noqa: F401 diff --git a/qai_hub_models/models/efficientnet_b4/conftest.py b/qai_hub_models/models/efficientnet_b4/conftest.py new file mode 100644 index 00000000..1d2ac353 --- /dev/null +++ b/qai_hub_models/models/efficientnet_b4/conftest.py @@ -0,0 +1,37 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + +import inspect + +import pytest + +from qai_hub_models.models.efficientnet_b4 import Model + + +# Instantiate the model only once for all tests. +# Mock from_pretrained to always return the initialized model. +# This speeds up tests and limits memory leaks. +@pytest.fixture(scope="module", autouse=True) +def cached_from_pretrained(): + with pytest.MonkeyPatch.context() as mp: + pretrained_cache = {} + from_pretrained = Model.from_pretrained + sig = inspect.signature(from_pretrained) + + def _cached_from_pretrained(*args, **kwargs): + cache_key = str(args) + str(kwargs) + model = pretrained_cache.get(cache_key, None) + if model: + return model + else: + model = from_pretrained(*args, **kwargs) + pretrained_cache[cache_key] = model + return model + + _cached_from_pretrained.__signature__ = sig + + mp.setattr(Model, "from_pretrained", _cached_from_pretrained) + yield mp diff --git a/qai_hub_models/models/efficientnet_b4/demo.py b/qai_hub_models/models/efficientnet_b4/demo.py new file mode 100644 index 00000000..9891e612 --- /dev/null +++ b/qai_hub_models/models/efficientnet_b4/demo.py @@ -0,0 +1,14 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo +from qai_hub_models.models.efficientnet_b4.model import MODEL_ID, EfficientNetB4 + + +def main(is_test: bool = False): + imagenet_demo(EfficientNetB4, MODEL_ID, is_test) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/facemap_3dmm/evaluate.py b/qai_hub_models/models/efficientnet_b4/evaluate.py similarity index 95% rename from qai_hub_models/models/facemap_3dmm/evaluate.py rename to qai_hub_models/models/efficientnet_b4/evaluate.py index 5de3ff97..29978c96 100644 --- a/qai_hub_models/models/facemap_3dmm/evaluate.py +++ b/qai_hub_models/models/efficientnet_b4/evaluate.py @@ -11,7 +11,7 @@ import qai_hub as hub -from qai_hub_models.models.facial_lmk import MODEL_ID, Model +from qai_hub_models.models.efficientnet_b4 import MODEL_ID, Model from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs from qai_hub_models.utils.evaluate import evaluate_on_dataset from qai_hub_models.utils.inference import compile_model_from_args diff --git a/qai_hub_models/models/efficientnet_b4/export.py b/qai_hub_models/models/efficientnet_b4/export.py new file mode 100644 index 00000000..044e1f12 --- /dev/null +++ b/qai_hub_models/models/efficientnet_b4/export.py @@ -0,0 +1,214 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import os +import warnings +from pathlib import Path +from typing import Any, Optional, cast + +import qai_hub as hub +import torch + +from qai_hub_models.models.common import ExportResult, TargetRuntime +from qai_hub_models.models.efficientnet_b4 import Model +from qai_hub_models.utils.args import ( + export_parser, + get_input_spec_kwargs, + get_model_kwargs, +) +from qai_hub_models.utils.compare import torch_inference +from qai_hub_models.utils.input_spec import make_torch_inputs +from qai_hub_models.utils.printing import ( + print_inference_metrics, + print_on_target_demo_cmd, + print_profile_metrics_from_job, +) +from qai_hub_models.utils.qai_hub_helpers import ( + can_access_qualcomm_ai_hub, + export_without_hub_access, +) + + +def export_model( + device: Optional[str] = None, + chipset: Optional[str] = None, + skip_profiling: bool = False, + skip_inferencing: bool = False, + skip_downloading: bool = False, + skip_summary: bool = False, + output_dir: Optional[str] = None, + target_runtime: TargetRuntime = TargetRuntime.TFLITE, + compile_options: str = "", + profile_options: str = "", + **additional_model_kwargs, +) -> ExportResult | list[str]: + """ + This function executes the following recipe: + + 1. Instantiates a PyTorch model and converts it to a traced TorchScript format + 2. Compiles the model to an asset that can be run on device + 3. Profiles the model performance on a real device + 4. Inferences the model on sample inputs + 5. Downloads the model asset to the local directory + 6. Summarizes the results from profiling and inference + + Each of the last 4 steps can be optionally skipped using the input options. + + Parameters: + device: Device for which to export the model. + Full list of available devices can be found by running `hub.get_devices()`. + Defaults to DEFAULT_DEVICE if not specified. + chipset: If set, will choose a random device with this chipset. + Overrides the `device` argument. + skip_profiling: If set, skips profiling of compiled model on real devices. + skip_inferencing: If set, skips computing on-device outputs from sample data. + skip_downloading: If set, skips downloading of compiled model. + skip_summary: If set, skips waiting for and summarizing results + from profiling and inference. + output_dir: Directory to store generated assets (e.g. compiled model). + Defaults to `/build/`. + target_runtime: Which on-device runtime to target. Default is TFLite. + compile_options: Additional options to pass when submitting the compile job. + profile_options: Additional options to pass when submitting the profile job. + **additional_model_kwargs: Additional optional kwargs used to customize + `model_cls.from_pretrained` and `model.get_input_spec` + + Returns: + A struct of: + * A CompileJob object containing metadata about the compile job submitted to hub. + * An InferenceJob containing metadata about the inference job (None if inferencing skipped). + * A ProfileJob containing metadata about the profile job (None if profiling skipped). + """ + model_name = "efficientnet_b4" + output_path = Path(output_dir or Path.cwd() / "build" / model_name) + if not device and not chipset: + raise ValueError("Device or Chipset must be provided.") + hub_device = hub.Device( + name=device or "", attributes=f"chipset:{chipset}" if chipset else None + ) + if not can_access_qualcomm_ai_hub(): + return export_without_hub_access( + "efficientnet_b4", + "EfficientNet-B4", + device or f"Device (Chipset {chipset})", + skip_profiling, + skip_inferencing, + skip_downloading, + skip_summary, + output_path, + target_runtime, + compile_options, + profile_options, + ) + + # On-device perf improves with I/O in channel_last format except when using ONNX. + use_channel_last_format = target_runtime != TargetRuntime.ONNX + + # 1. Instantiates a PyTorch model and converts it to a traced TorchScript format + model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs)) + input_spec = model.get_input_spec( + **get_input_spec_kwargs(model, additional_model_kwargs) + ) + + # Trace the model + source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) + + # 2. Compiles the model to an asset that can be run on device + model_compile_options = model.get_hub_compile_options( + target_runtime, compile_options, hub_device + ) + print(f"Optimizing model {model_name} to run on-device") + submitted_compile_job = hub.submit_compile_job( + model=source_model, + input_specs=input_spec, + device=hub_device, + name=model_name, + options=model_compile_options, + ) + compile_job = cast(hub.client.CompileJob, submitted_compile_job) + + # 3. Profiles the model performance on a real device + profile_job: Optional[hub.client.ProfileJob] = None + if not skip_profiling: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print(f"Profiling model {model_name} on a hosted device.") + submitted_profile_job = hub.submit_profile_job( + model=compile_job.get_target_model(), + device=hub_device, + name=model_name, + options=profile_options_all, + ) + profile_job = cast(hub.client.ProfileJob, submitted_profile_job) + + # 4. Inferences the model on sample inputs + inference_job: Optional[hub.client.InferenceJob] = None + if not skip_inferencing: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print( + f"Running inference for {model_name} on a hosted device with example inputs." + ) + sample_inputs = model.sample_inputs( + input_spec, use_channel_last_format=use_channel_last_format + ) + submitted_inference_job = hub.submit_inference_job( + model=compile_job.get_target_model(), + inputs=sample_inputs, + device=hub_device, + name=model_name, + options=profile_options_all, + ) + inference_job = cast(hub.client.InferenceJob, submitted_inference_job) + + # 5. Downloads the model asset to the local directory + if not skip_downloading: + os.makedirs(output_path, exist_ok=True) + target_model: hub.Model = compile_job.get_target_model() # type: ignore + target_model.download(str(output_path / model_name)) + + # 6. Summarizes the results from profiling and inference + if not skip_summary and not skip_profiling: + assert profile_job is not None and profile_job.wait().success + profile_data: dict[str, Any] = profile_job.download_profile() # type: ignore + print_profile_metrics_from_job(profile_job, profile_data) + + if not skip_summary and not skip_inferencing: + sample_inputs = model.sample_inputs(use_channel_last_format=False) + torch_out = torch_inference( + model, sample_inputs, return_channel_last_output=use_channel_last_format + ) + assert inference_job is not None and inference_job.wait().success + inference_result: hub.client.DatasetEntries = inference_job.download_output_data() # type: ignore + + print_inference_metrics( + inference_job, inference_result, torch_out, model.get_output_names() + ) + + if not skip_summary: + print_on_target_demo_cmd(compile_job, Path(__file__).parent, hub_device) + + return ExportResult( + compile_job=compile_job, + inference_job=inference_job, + profile_job=profile_job, + ) + + +def main(): + warnings.filterwarnings("ignore") + parser = export_parser(model_cls=Model) + args = parser.parse_args() + export_model(**vars(args)) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/efficientnet_b4/info.yaml b/qai_hub_models/models/efficientnet_b4/info.yaml new file mode 100644 index 00000000..40626633 --- /dev/null +++ b/qai_hub_models/models/efficientnet_b4/info.yaml @@ -0,0 +1,46 @@ +name: EfficientNet-B4 +# id must match with the model dir name in qai_hub_models +id: efficientnet_b4 +status: public +private_perf_form_factors: + - auto +headline: Imagenet classifier and general purpose backbone. +domain: Computer Vision +description: EfficientNetB4 is a machine learning model that can classify images from + the Imagenet dataset. It can also be used as a backbone in building more complex + models for specific use cases. +use_case: Image Classification +tags: + - backbone +research_paper: https://arxiv.org/abs/1905.11946 +research_paper_title: 'EfficientNet: Rethinking Model Scaling for Convolutional Neural + Networks' +license: https://github.com/pytorch/vision/blob/main/LICENSE +deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf +source_repo: https://github.com/pytorch/vision/blob/main/torchvision/models/efficientnet.py +technical_details: + Model checkpoint: Imagenet + Input resolution: 380x380 + Number of parameters: 19.34M + Model size: 74.5 MB +applicable_scenarios: + - Medical Imaging + - Anomaly Detection + - Inventory Management +related_models: + - efficientnet_b0 + - mobilenet_v2 + - densenet121 + - googlenet +form_factors: + - Phone + - Tablet + - IoT +has_static_banner: true +has_animated_banner: true +license_type: bsd-3-clause +deploy_license_type: AI Model Hub License +dataset: + - imagenet-1k + - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/efficientnet_b4/model.py b/qai_hub_models/models/efficientnet_b4/model.py new file mode 100644 index 00000000..c7458aa7 --- /dev/null +++ b/qai_hub_models/models/efficientnet_b4/model.py @@ -0,0 +1,20 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from __future__ import annotations + +import torchvision.models as tv_models + +from qai_hub_models.models._shared.imagenet_classifier.model import ImagenetClassifier + +MODEL_ID = __name__.split(".")[-2] +MODEL_ASSET_VERSION = 1 +DEFAULT_WEIGHTS = "IMAGENET1K_V1" + + +class EfficientNetB4(ImagenetClassifier): + @classmethod + def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> EfficientNetB4: + net = tv_models.efficientnet_b4(weights=weights) + return cls(net) diff --git a/qai_hub_models/models/efficientnet_b4/perf.yaml b/qai_hub_models/models/efficientnet_b4/perf.yaml new file mode 100644 index 00000000..7c0fe884 --- /dev/null +++ b/qai_hub_models/models/efficientnet_b4/perf.yaml @@ -0,0 +1,317 @@ +aggregated: + supported_devices: + - Snapdragon 8 Elite QRD + - Samsung Galaxy S24 + - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ + - Samsung Galaxy S23 + - Samsung Galaxy S23 Ultra + - Samsung Galaxy S23+ + - Samsung Galaxy S22 5G + - Samsung Galaxy S22 Ultra 5G + - Samsung Galaxy S22+ 5G + - Samsung Galaxy Tab S8 + - Xiaomi 12 + - Xiaomi 12 Pro + - Samsung Galaxy S21 + - Samsung Galaxy S21 Ultra + - Samsung Galaxy S21+ + - Snapdragon X Elite CRD + - Snapdragon X Plus 8-Core CRD + - QCS8450 (Proxy) + - XR2 Gen 2 (Proxy) + - QCS8550 (Proxy) + - SA8255 (Proxy) + - SA8295P ADP + - SA8650 (Proxy) + - SA8775 (Proxy) + supported_chipsets: + - Snapdragon® 8 Elite + - Snapdragon® 8 Gen 3 + - Snapdragon® 8 Gen 2 + - Snapdragon® 8 Gen 1 + - Snapdragon® 888 + - Snapdragon® X Elite + - Snapdragon® X Plus 8-Core + - QCS8450 Proxy + - QCS8550 Proxy + - SA8255P Proxy + - SA8295P + - SA8650P Proxy + - SA8775P Proxy +models: +- name: EfficientNet-B4 + performance_metrics: + - torchscript_onnx_tflite: + inference_time: 3624.0 + throughput: 275.9381898454746 + estimated_peak_memory_range: + min: 16384 + max: 3167680 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 482 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 482 + job_id: jp2k7dw4p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 3726.0 + throughput: 268.3843263553409 + estimated_peak_memory_range: + min: 28672 + max: 240842880 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 480 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 480 + job_id: jgo21wrxp + job_status: Passed + torchscript_onnx: + inference_time: 3571.0 + throughput: 280.0336040324839 + estimated_peak_memory_range: + min: 12288 + max: 51971632 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 482 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 482 + job_id: jp14d1n8p + job_status: Passed + reference_device_info: + name: Samsung Galaxy S23 + os: '13' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 2 + timestamp: '2024-11-09T23:13:52Z' + - torchscript_onnx_tflite: + inference_time: 2629.0 + throughput: 380.3727653100038 + estimated_peak_memory_range: + min: 0 + max: 167106752 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 482 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 482 + job_id: jpy142x7p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 2694.0 + throughput: 371.19524870081665 + estimated_peak_memory_range: + min: 0 + max: 28486160 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 480 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 480 + job_id: jpv61mdj5 + job_status: Passed + torchscript_onnx: + inference_time: 2589.0 + throughput: 386.24951718810354 + estimated_peak_memory_range: + min: 344064 + max: 171876288 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 482 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 482 + job_id: jgdxr4drp + job_status: Passed + reference_device_info: + name: Samsung Galaxy S24 + os: '14' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T23:13:53Z' + - torchscript_onnx_tflite: + inference_time: 2106.0 + throughput: 474.8338081671415 + estimated_peak_memory_range: + min: 12288 + max: 65791248 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 482 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 482 + job_id: jp0z19j65 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 2548.0 + throughput: 392.4646781789639 + estimated_peak_memory_range: + min: 0 + max: 26139056 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 480 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 480 + job_id: jgjv0y7xg + job_status: Passed + torchscript_onnx: + inference_time: 2505.0 + throughput: 399.2015968063872 + estimated_peak_memory_range: + min: 0 + max: 71471536 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 482 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 482 + job_id: j57yjnev5 + job_status: Passed + reference_device_info: + name: Snapdragon 8 Elite QRD + os: '15' + form_factor: Phone + os_name: Android + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T23:13:54Z' + - torchscript_onnx_tflite: + inference_time: 3610.0 + throughput: 277.0083102493075 + estimated_peak_memory_range: + min: 16384 + max: 2220376 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 482 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 482 + job_id: jp8q3rxxp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 3321.0 + throughput: 301.11412225233363 + estimated_peak_memory_range: + min: 634880 + max: 1747072 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 480 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 480 + job_id: jpedrxz15 + job_status: Passed + reference_device_info: + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: QCS8550 Proxy + timestamp: '2024-11-09T23:13:44Z' + - torchscript_onnx_tflite: + inference_time: 7289.0 + throughput: 137.1930305940458 + estimated_peak_memory_range: + min: 0 + max: 182094864 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 482 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 482 + job_id: jp3j479lg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 7403.0 + throughput: 135.080372821829 + estimated_peak_memory_range: + min: 0 + max: 35397920 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 480 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 480 + job_id: jg9l32zwg + job_status: Passed + reference_device_info: + name: QCS8450 (Proxy) + os: '13' + form_factor: Xr + os_name: Android + manufacturer: Qualcomm + chipset: QCS8450 Proxy + timestamp: '2024-11-09T23:13:51Z' + - torchscript_onnx_qnn: + inference_time: 3659.0 + throughput: 273.2987154960372 + estimated_peak_memory_range: + min: 602112 + max: 602112 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 480 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 480 + job_id: jgz3xymk5 + job_status: Passed + torchscript_onnx: + inference_time: 3728.0 + throughput: 268.2403433476395 + estimated_peak_memory_range: + min: 48967680 + max: 48967680 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 482 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 482 + job_id: jp4lx4y85 + job_status: Passed + reference_device_info: + name: Snapdragon X Elite CRD + os: '11' + form_factor: Compute + os_name: Windows + manufacturer: Qualcomm + chipset: Snapdragon® X Elite + timestamp: '2024-11-09T23:13:56Z' diff --git a/qai_hub_models/models/efficientnet_b4/test.py b/qai_hub_models/models/efficientnet_b4/test.py new file mode 100644 index 00000000..b4e463b6 --- /dev/null +++ b/qai_hub_models/models/efficientnet_b4/test.py @@ -0,0 +1,32 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +import pytest + +from qai_hub_models.models._shared.imagenet_classifier.test_utils import ( + run_imagenet_classifier_test, + run_imagenet_classifier_trace_test, +) +from qai_hub_models.models.efficientnet_b4.demo import main as demo_main +from qai_hub_models.models.efficientnet_b4.model import ( + MODEL_ASSET_VERSION, + MODEL_ID, + EfficientNetB4, +) + + +def test_task(): + run_imagenet_classifier_test( + EfficientNetB4.from_pretrained(), MODEL_ID, asset_version=MODEL_ASSET_VERSION + ) + + +@pytest.mark.trace +def test_trace(): + run_imagenet_classifier_trace_test(EfficientNetB4.from_pretrained()) + + +def test_demo(): + # Verify demo does not crash + demo_main(is_test=True) diff --git a/qai_hub_models/models/efficientvit_b2_cls/README.md b/qai_hub_models/models/efficientvit_b2_cls/README.md new file mode 100644 index 00000000..b160c25f --- /dev/null +++ b/qai_hub_models/models/efficientvit_b2_cls/README.md @@ -0,0 +1,63 @@ +[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md) + + +# [EfficientViT-b2-cls: Imagenet classifier and general purpose backbone](https://aihub.qualcomm.com/models/efficientvit_b2_cls) + +EfficientViT is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. + +{source_repo_details}This repository contains scripts for optimized on-device +export suitable to run on Qualcomm® devices. More details on model performance +accross various devices, can be found [here](https://aihub.qualcomm.com/models/efficientvit_b2_cls). + +[Sign up](https://myaccount.qualcomm.com/signup) to start using Qualcomm AI Hub and run these models on a hosted Qualcomm® device. + + + + +## Example & Usage + +Install the package via pip: +```bash +pip install "qai_hub_models[efficientvit_b2_cls]" +``` + + +Once installed, run the following simple CLI demo: + +```bash +python -m qai_hub_models.models.efficientvit_b2_cls.demo +``` +More details on the CLI tool can be found with the `--help` option. See +[demo.py](demo.py) for sample usage of the model including pre/post processing +scripts. Please refer to our [general instructions on using +models](../../../#getting-started) for more usage instructions. + +## Export for on-device deployment + +This repository contains export scripts that produce a model optimized for +on-device deployment. This can be run as follows: + +```bash +python -m qai_hub_models.models.efficientvit_b2_cls.export +``` +Additional options are documented with the `--help` option. Note that the above +script requires access to Deployment instructions for Qualcomm® AI Hub. + + +## License +* The license for the original implementation of EfficientViT-b2-cls can be found + [here](https://github.com/CVHub520/efficientvit/blob/main/LICENSE). +* The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) + + +## References +* [EfficientViT: Multi-Scale Linear Attention for High-Resolution Dense Prediction](https://arxiv.org/abs/2205.14756) +* [Source Model Implementation](https://github.com/CVHub520/efficientvit) + + + +## Community +* Join [our AI Hub Slack community](https://aihub.qualcomm.com/community/slack) to collaborate, post questions and learn more about on-device AI. +* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com). + + diff --git a/qai_hub_models/models/efficientvit_b2_cls/__init__.py b/qai_hub_models/models/efficientvit_b2_cls/__init__.py new file mode 100644 index 00000000..750b5548 --- /dev/null +++ b/qai_hub_models/models/efficientvit_b2_cls/__init__.py @@ -0,0 +1,7 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- + +from .model import MODEL_ID # noqa: F401 +from .model import EfficientViT as Model # noqa: F401 diff --git a/qai_hub_models/models/efficientvit_b2_cls/conftest.py b/qai_hub_models/models/efficientvit_b2_cls/conftest.py new file mode 100644 index 00000000..e369ccfc --- /dev/null +++ b/qai_hub_models/models/efficientvit_b2_cls/conftest.py @@ -0,0 +1,39 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + +import inspect + +import pytest + +from qai_hub_models.models.efficientvit_b2_cls import Model +from qai_hub_models.utils.testing import skip_clone_repo_check + + +# Instantiate the model only once for all tests. +# Mock from_pretrained to always return the initialized model. +# This speeds up tests and limits memory leaks. +@pytest.fixture(scope="module", autouse=True) +def cached_from_pretrained(): + with pytest.MonkeyPatch.context() as mp: + pretrained_cache = {} + from_pretrained = Model.from_pretrained + sig = inspect.signature(from_pretrained) + + @skip_clone_repo_check + def _cached_from_pretrained(*args, **kwargs): + cache_key = str(args) + str(kwargs) + model = pretrained_cache.get(cache_key, None) + if model: + return model + else: + model = from_pretrained(*args, **kwargs) + pretrained_cache[cache_key] = model + return model + + _cached_from_pretrained.__signature__ = sig + + mp.setattr(Model, "from_pretrained", _cached_from_pretrained) + yield mp diff --git a/qai_hub_models/models/efficientvit_b2_cls/demo.py b/qai_hub_models/models/efficientvit_b2_cls/demo.py new file mode 100644 index 00000000..edab627f --- /dev/null +++ b/qai_hub_models/models/efficientvit_b2_cls/demo.py @@ -0,0 +1,14 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo +from qai_hub_models.models.efficientvit_b2_cls.model import MODEL_ID, EfficientViT + + +def main(is_test: bool = False): + imagenet_demo(EfficientViT, MODEL_ID, is_test) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/efficientvit_b2_cls/evaluate.py b/qai_hub_models/models/efficientvit_b2_cls/evaluate.py new file mode 100644 index 00000000..bb9f7019 --- /dev/null +++ b/qai_hub_models/models/efficientvit_b2_cls/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.efficientvit_b2_cls import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/efficientvit_b2_cls/export.py b/qai_hub_models/models/efficientvit_b2_cls/export.py new file mode 100644 index 00000000..afacd921 --- /dev/null +++ b/qai_hub_models/models/efficientvit_b2_cls/export.py @@ -0,0 +1,214 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import os +import warnings +from pathlib import Path +from typing import Any, Optional, cast + +import qai_hub as hub +import torch + +from qai_hub_models.models.common import ExportResult, TargetRuntime +from qai_hub_models.models.efficientvit_b2_cls import Model +from qai_hub_models.utils.args import ( + export_parser, + get_input_spec_kwargs, + get_model_kwargs, +) +from qai_hub_models.utils.compare import torch_inference +from qai_hub_models.utils.input_spec import make_torch_inputs +from qai_hub_models.utils.printing import ( + print_inference_metrics, + print_on_target_demo_cmd, + print_profile_metrics_from_job, +) +from qai_hub_models.utils.qai_hub_helpers import ( + can_access_qualcomm_ai_hub, + export_without_hub_access, +) + + +def export_model( + device: Optional[str] = None, + chipset: Optional[str] = None, + skip_profiling: bool = False, + skip_inferencing: bool = False, + skip_downloading: bool = False, + skip_summary: bool = False, + output_dir: Optional[str] = None, + target_runtime: TargetRuntime = TargetRuntime.TFLITE, + compile_options: str = "", + profile_options: str = "", + **additional_model_kwargs, +) -> ExportResult | list[str]: + """ + This function executes the following recipe: + + 1. Instantiates a PyTorch model and converts it to a traced TorchScript format + 2. Compiles the model to an asset that can be run on device + 3. Profiles the model performance on a real device + 4. Inferences the model on sample inputs + 5. Downloads the model asset to the local directory + 6. Summarizes the results from profiling and inference + + Each of the last 4 steps can be optionally skipped using the input options. + + Parameters: + device: Device for which to export the model. + Full list of available devices can be found by running `hub.get_devices()`. + Defaults to DEFAULT_DEVICE if not specified. + chipset: If set, will choose a random device with this chipset. + Overrides the `device` argument. + skip_profiling: If set, skips profiling of compiled model on real devices. + skip_inferencing: If set, skips computing on-device outputs from sample data. + skip_downloading: If set, skips downloading of compiled model. + skip_summary: If set, skips waiting for and summarizing results + from profiling and inference. + output_dir: Directory to store generated assets (e.g. compiled model). + Defaults to `/build/`. + target_runtime: Which on-device runtime to target. Default is TFLite. + compile_options: Additional options to pass when submitting the compile job. + profile_options: Additional options to pass when submitting the profile job. + **additional_model_kwargs: Additional optional kwargs used to customize + `model_cls.from_pretrained` and `model.get_input_spec` + + Returns: + A struct of: + * A CompileJob object containing metadata about the compile job submitted to hub. + * An InferenceJob containing metadata about the inference job (None if inferencing skipped). + * A ProfileJob containing metadata about the profile job (None if profiling skipped). + """ + model_name = "efficientvit_b2_cls" + output_path = Path(output_dir or Path.cwd() / "build" / model_name) + if not device and not chipset: + raise ValueError("Device or Chipset must be provided.") + hub_device = hub.Device( + name=device or "", attributes=f"chipset:{chipset}" if chipset else None + ) + if not can_access_qualcomm_ai_hub(): + return export_without_hub_access( + "efficientvit_b2_cls", + "EfficientViT-b2-cls", + device or f"Device (Chipset {chipset})", + skip_profiling, + skip_inferencing, + skip_downloading, + skip_summary, + output_path, + target_runtime, + compile_options, + profile_options, + ) + + # On-device perf improves with I/O in channel_last format except when using ONNX. + use_channel_last_format = target_runtime != TargetRuntime.ONNX + + # 1. Instantiates a PyTorch model and converts it to a traced TorchScript format + model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs)) + input_spec = model.get_input_spec( + **get_input_spec_kwargs(model, additional_model_kwargs) + ) + + # Trace the model + source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) + + # 2. Compiles the model to an asset that can be run on device + model_compile_options = model.get_hub_compile_options( + target_runtime, compile_options, hub_device + ) + print(f"Optimizing model {model_name} to run on-device") + submitted_compile_job = hub.submit_compile_job( + model=source_model, + input_specs=input_spec, + device=hub_device, + name=model_name, + options=model_compile_options, + ) + compile_job = cast(hub.client.CompileJob, submitted_compile_job) + + # 3. Profiles the model performance on a real device + profile_job: Optional[hub.client.ProfileJob] = None + if not skip_profiling: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print(f"Profiling model {model_name} on a hosted device.") + submitted_profile_job = hub.submit_profile_job( + model=compile_job.get_target_model(), + device=hub_device, + name=model_name, + options=profile_options_all, + ) + profile_job = cast(hub.client.ProfileJob, submitted_profile_job) + + # 4. Inferences the model on sample inputs + inference_job: Optional[hub.client.InferenceJob] = None + if not skip_inferencing: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print( + f"Running inference for {model_name} on a hosted device with example inputs." + ) + sample_inputs = model.sample_inputs( + input_spec, use_channel_last_format=use_channel_last_format + ) + submitted_inference_job = hub.submit_inference_job( + model=compile_job.get_target_model(), + inputs=sample_inputs, + device=hub_device, + name=model_name, + options=profile_options_all, + ) + inference_job = cast(hub.client.InferenceJob, submitted_inference_job) + + # 5. Downloads the model asset to the local directory + if not skip_downloading: + os.makedirs(output_path, exist_ok=True) + target_model: hub.Model = compile_job.get_target_model() # type: ignore + target_model.download(str(output_path / model_name)) + + # 6. Summarizes the results from profiling and inference + if not skip_summary and not skip_profiling: + assert profile_job is not None and profile_job.wait().success + profile_data: dict[str, Any] = profile_job.download_profile() # type: ignore + print_profile_metrics_from_job(profile_job, profile_data) + + if not skip_summary and not skip_inferencing: + sample_inputs = model.sample_inputs(use_channel_last_format=False) + torch_out = torch_inference( + model, sample_inputs, return_channel_last_output=use_channel_last_format + ) + assert inference_job is not None and inference_job.wait().success + inference_result: hub.client.DatasetEntries = inference_job.download_output_data() # type: ignore + + print_inference_metrics( + inference_job, inference_result, torch_out, model.get_output_names() + ) + + if not skip_summary: + print_on_target_demo_cmd(compile_job, Path(__file__).parent, hub_device) + + return ExportResult( + compile_job=compile_job, + inference_job=inference_job, + profile_job=profile_job, + ) + + +def main(): + warnings.filterwarnings("ignore") + parser = export_parser(model_cls=Model) + args = parser.parse_args() + export_model(**vars(args)) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/efficientvit_b2_cls/info.yaml b/qai_hub_models/models/efficientvit_b2_cls/info.yaml new file mode 100644 index 00000000..365144bb --- /dev/null +++ b/qai_hub_models/models/efficientvit_b2_cls/info.yaml @@ -0,0 +1,46 @@ +name: EfficientViT-b2-cls +# id must match with the model dir name in qai_hub_models +id: efficientvit_b2_cls +status: public +private_perf_form_factors: + - auto +headline: Imagenet classifier and general purpose backbone. +domain: Computer Vision +description: EfficientViT is a machine learning model that can classify images from + the Imagenet dataset. It can also be used as a backbone in building more complex + models for specific use cases. +use_case: Image Classification +tags: + - backbone + - real-time +research_paper: https://arxiv.org/abs/2205.14756 +research_paper_title: 'EfficientViT: Multi-Scale Linear Attention for High-Resolution Dense Prediction' +license: https://github.com/CVHub520/efficientvit/blob/main/LICENSE +deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf +source_repo: https://github.com/CVHub520/efficientvit +technical_details: + Model checkpoint: Imagenet + Input resolution: 224x224 + Number of parameters: 24M + Model size: 200 MB +applicable_scenarios: + - Medical Imaging + - Anomaly Detection + - Inventory Management +related_models: + - mobilenet_v2_quantized + - squeezenet1_1 + - densenet121 + - googlenet +form_factors: + - Phone + - Tablet + - IoT + - XR +has_static_banner: true +has_animated_banner: true +license_type: bsd-3-clause +deploy_license_type: AI Model Hub License +dataset: + - imagenet-1k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/efficientvit_b2_cls/model.py b/qai_hub_models/models/efficientvit_b2_cls/model.py new file mode 100644 index 00000000..b8975ac4 --- /dev/null +++ b/qai_hub_models/models/efficientvit_b2_cls/model.py @@ -0,0 +1,43 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from __future__ import annotations + +import torch + +from qai_hub_models.models._shared.imagenet_classifier.model import ImagenetClassifier +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, SourceAsRoot + +EFFICIENTVIT_SOURCE_REPOSITORY = "https://github.com/CVHub520/efficientvit" +EFFICIENTVIT_SOURCE_REPO_COMMIT = "6ecbe58ab66bf83d8f784dc4a6296b185d64e4b8" +MODEL_ID = __name__.split(".")[-2] + +DEFAULT_WEIGHTS = "b2-r288.pt" +MODEL_ASSET_VERSION = 1 + + +class EfficientViT(ImagenetClassifier): + """Exportable EfficientViT Image classifier, end-to-end.""" + + @classmethod + def from_pretrained(cls, weights: str | None = None): + """Load EfficientViT from a weightfile created by the source repository.""" + with SourceAsRoot( + EFFICIENTVIT_SOURCE_REPOSITORY, + EFFICIENTVIT_SOURCE_REPO_COMMIT, + MODEL_ID, + MODEL_ASSET_VERSION, + ): + from efficientvit.cls_model_zoo import create_cls_model + + if not weights: + pass + weights = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_WEIGHTS + ).fetch() + + efficientvit_model = create_cls_model(name="b2", weight_url=weights) + efficientvit_model.to(torch.device("cpu")) + efficientvit_model.eval() + return cls(efficientvit_model) diff --git a/qai_hub_models/models/efficientvit_b2_cls/perf.yaml b/qai_hub_models/models/efficientvit_b2_cls/perf.yaml new file mode 100644 index 00000000..799ec0bb --- /dev/null +++ b/qai_hub_models/models/efficientvit_b2_cls/perf.yaml @@ -0,0 +1,317 @@ +aggregated: + supported_devices: + - Snapdragon 8 Elite QRD + - Samsung Galaxy S24 + - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ + - Samsung Galaxy S23 + - Samsung Galaxy S23 Ultra + - Samsung Galaxy S23+ + - Samsung Galaxy S22 5G + - Samsung Galaxy S22 Ultra 5G + - Samsung Galaxy S22+ 5G + - Samsung Galaxy Tab S8 + - Xiaomi 12 + - Xiaomi 12 Pro + - Samsung Galaxy S21 + - Samsung Galaxy S21 Ultra + - Samsung Galaxy S21+ + - Snapdragon X Elite CRD + - Snapdragon X Plus 8-Core CRD + - QCS8450 (Proxy) + - XR2 Gen 2 (Proxy) + - QCS8550 (Proxy) + - SA8255 (Proxy) + - SA8295P ADP + - SA8650 (Proxy) + - SA8775 (Proxy) + supported_chipsets: + - Snapdragon® 8 Elite + - Snapdragon® 8 Gen 3 + - Snapdragon® 8 Gen 2 + - Snapdragon® 8 Gen 1 + - Snapdragon® 888 + - Snapdragon® X Elite + - Snapdragon® X Plus 8-Core + - QCS8450 Proxy + - QCS8550 Proxy + - SA8255P Proxy + - SA8295P + - SA8650P Proxy + - SA8775P Proxy +models: +- name: EfficientViT-b2-cls + performance_metrics: + - torchscript_onnx_tflite: + inference_time: 7020.0 + throughput: 142.45014245014244 + estimated_peak_memory_range: + min: 32768 + max: 2791320 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 379 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 379 + job_id: jpxk7rx15 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 7832.0 + throughput: 127.68130745658836 + estimated_peak_memory_range: + min: 16384 + max: 35859248 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 398 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 398 + job_id: j5q671v4p + job_status: Passed + torchscript_onnx: + inference_time: 6869.0 + throughput: 145.5815984859514 + estimated_peak_memory_range: + min: 20480 + max: 60828368 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 370 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 370 + job_id: jg9l32mlg + job_status: Passed + reference_device_info: + name: Samsung Galaxy S23 + os: '13' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 2 + timestamp: '2024-11-09T23:13:05Z' + - torchscript_onnx_tflite: + inference_time: 4658.0 + throughput: 214.68441391155002 + estimated_peak_memory_range: + min: 0 + max: 202679328 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 379 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 379 + job_id: j5mnwk8wp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 5206.0 + throughput: 192.0860545524395 + estimated_peak_memory_range: + min: 610304 + max: 37600192 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 398 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 398 + job_id: jglv08l85 + job_status: Passed + torchscript_onnx: + inference_time: 4440.0 + throughput: 225.22522522522522 + estimated_peak_memory_range: + min: 696320 + max: 251894464 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 370 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 370 + job_id: jp14d1j2p + job_status: Passed + reference_device_info: + name: Samsung Galaxy S24 + os: '14' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T23:13:06Z' + - torchscript_onnx_tflite: + inference_time: 4868.0 + throughput: 205.42317173377157 + estimated_peak_memory_range: + min: 16384 + max: 54755792 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 379 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 379 + job_id: jgn69qkr5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 4531.0 + throughput: 220.70183182520415 + estimated_peak_memory_range: + min: 0 + max: 36723808 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 398 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 398 + job_id: j56y3mw0p + job_status: Passed + torchscript_onnx: + inference_time: 4721.0 + throughput: 211.81952976064395 + estimated_peak_memory_range: + min: 0 + max: 73661552 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 370 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 370 + job_id: jgdxr43ep + job_status: Passed + reference_device_info: + name: Snapdragon 8 Elite QRD + os: '15' + form_factor: Phone + os_name: Android + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T23:13:07Z' + - torchscript_onnx_tflite: + inference_time: 7060.0 + throughput: 141.643059490085 + estimated_peak_memory_range: + min: 32768 + max: 1635992 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 379 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 379 + job_id: jprv4dw9g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 7480.0 + throughput: 133.6898395721925 + estimated_peak_memory_range: + min: 655360 + max: 1796712 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 398 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 398 + job_id: jp3j476lg + job_status: Passed + reference_device_info: + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: QCS8550 Proxy + timestamp: '2024-11-09T23:12:57Z' + - torchscript_onnx_tflite: + inference_time: 8143.0 + throughput: 122.80486307257767 + estimated_peak_memory_range: + min: 20480 + max: 197576240 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 379 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 379 + job_id: jgkel082g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 8909.0 + throughput: 112.24604332697272 + estimated_peak_memory_range: + min: 622592 + max: 38523216 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 398 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 398 + job_id: j5wedz765 + job_status: Passed + reference_device_info: + name: QCS8450 (Proxy) + os: '13' + form_factor: Xr + os_name: Android + manufacturer: Qualcomm + chipset: QCS8450 Proxy + timestamp: '2024-11-09T23:13:04Z' + - torchscript_onnx_qnn: + inference_time: 7943.0 + throughput: 125.89701624071509 + estimated_peak_memory_range: + min: 602112 + max: 602112 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 398 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 398 + job_id: jgo21w8xp + job_status: Passed + torchscript_onnx: + inference_time: 7498.0 + throughput: 133.36889837289945 + estimated_peak_memory_range: + min: 53006336 + max: 53006336 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 370 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 370 + job_id: j57yjn4l5 + job_status: Passed + reference_device_info: + name: Snapdragon X Elite CRD + os: '11' + form_factor: Compute + os_name: Windows + manufacturer: Qualcomm + chipset: Snapdragon® X Elite + timestamp: '2024-11-09T23:13:08Z' diff --git a/qai_hub_models/models/efficientvit_b2_cls/requirements.txt b/qai_hub_models/models/efficientvit_b2_cls/requirements.txt new file mode 100644 index 00000000..509b76b3 --- /dev/null +++ b/qai_hub_models/models/efficientvit_b2_cls/requirements.txt @@ -0,0 +1,5 @@ +onnxruntime<1.20.0 +onnxsim<=0.4.36 +segment-anything==1.0 +timm==1.0.3 +torchpack==0.3.1 diff --git a/qai_hub_models/models/efficientvit_b2_cls/test.py b/qai_hub_models/models/efficientvit_b2_cls/test.py new file mode 100644 index 00000000..e04b6f77 --- /dev/null +++ b/qai_hub_models/models/efficientvit_b2_cls/test.py @@ -0,0 +1,38 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +import pytest + +from qai_hub_models.models._shared.imagenet_classifier.test_utils import ( + run_imagenet_classifier_test, + run_imagenet_classifier_trace_test, +) +from qai_hub_models.models.efficientvit_b2_cls.demo import main as demo_main +from qai_hub_models.models.efficientvit_b2_cls.model import ( + MODEL_ASSET_VERSION, + MODEL_ID, + EfficientViT, +) +from qai_hub_models.utils.testing import skip_clone_repo_check + + +@skip_clone_repo_check +def test_task(): + run_imagenet_classifier_test( + EfficientViT.from_pretrained(), + MODEL_ID, + asset_version=MODEL_ASSET_VERSION, + probability_threshold=0.39, + ) + + +@pytest.mark.trace +@skip_clone_repo_check +def test_trace(): + run_imagenet_classifier_trace_test(EfficientViT.from_pretrained()) + + +@skip_clone_repo_check +def test_demo(): + demo_main(is_test=True) diff --git a/qai_hub_models/models/efficientvit_l2_cls/README.md b/qai_hub_models/models/efficientvit_l2_cls/README.md new file mode 100644 index 00000000..f43bbef1 --- /dev/null +++ b/qai_hub_models/models/efficientvit_l2_cls/README.md @@ -0,0 +1,63 @@ +[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md) + + +# [EfficientViT-l2-cls: Imagenet classifier and general purpose backbone](https://aihub.qualcomm.com/models/efficientvit_l2_cls) + +EfficientViT is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. + +{source_repo_details}This repository contains scripts for optimized on-device +export suitable to run on Qualcomm® devices. More details on model performance +accross various devices, can be found [here](https://aihub.qualcomm.com/models/efficientvit_l2_cls). + +[Sign up](https://myaccount.qualcomm.com/signup) to start using Qualcomm AI Hub and run these models on a hosted Qualcomm® device. + + + + +## Example & Usage + +Install the package via pip: +```bash +pip install "qai_hub_models[efficientvit_l2_cls]" +``` + + +Once installed, run the following simple CLI demo: + +```bash +python -m qai_hub_models.models.efficientvit_l2_cls.demo +``` +More details on the CLI tool can be found with the `--help` option. See +[demo.py](demo.py) for sample usage of the model including pre/post processing +scripts. Please refer to our [general instructions on using +models](../../../#getting-started) for more usage instructions. + +## Export for on-device deployment + +This repository contains export scripts that produce a model optimized for +on-device deployment. This can be run as follows: + +```bash +python -m qai_hub_models.models.efficientvit_l2_cls.export +``` +Additional options are documented with the `--help` option. Note that the above +script requires access to Deployment instructions for Qualcomm® AI Hub. + + +## License +* The license for the original implementation of EfficientViT-l2-cls can be found + [here](https://github.com/CVHub520/efficientvit/blob/main/LICENSE). +* The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) + + +## References +* [EfficientViT: Multi-Scale Linear Attention for High-Resolution Dense Prediction](https://arxiv.org/abs/2205.14756) +* [Source Model Implementation](https://github.com/CVHub520/efficientvit) + + + +## Community +* Join [our AI Hub Slack community](https://aihub.qualcomm.com/community/slack) to collaborate, post questions and learn more about on-device AI. +* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com). + + diff --git a/qai_hub_models/models/efficientvit_l2_cls/__init__.py b/qai_hub_models/models/efficientvit_l2_cls/__init__.py new file mode 100644 index 00000000..750b5548 --- /dev/null +++ b/qai_hub_models/models/efficientvit_l2_cls/__init__.py @@ -0,0 +1,7 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- + +from .model import MODEL_ID # noqa: F401 +from .model import EfficientViT as Model # noqa: F401 diff --git a/qai_hub_models/models/efficientvit_l2_cls/conftest.py b/qai_hub_models/models/efficientvit_l2_cls/conftest.py new file mode 100644 index 00000000..5c331a01 --- /dev/null +++ b/qai_hub_models/models/efficientvit_l2_cls/conftest.py @@ -0,0 +1,39 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + +import inspect + +import pytest + +from qai_hub_models.models.efficientvit_l2_cls import Model +from qai_hub_models.utils.testing import skip_clone_repo_check + + +# Instantiate the model only once for all tests. +# Mock from_pretrained to always return the initialized model. +# This speeds up tests and limits memory leaks. +@pytest.fixture(scope="module", autouse=True) +def cached_from_pretrained(): + with pytest.MonkeyPatch.context() as mp: + pretrained_cache = {} + from_pretrained = Model.from_pretrained + sig = inspect.signature(from_pretrained) + + @skip_clone_repo_check + def _cached_from_pretrained(*args, **kwargs): + cache_key = str(args) + str(kwargs) + model = pretrained_cache.get(cache_key, None) + if model: + return model + else: + model = from_pretrained(*args, **kwargs) + pretrained_cache[cache_key] = model + return model + + _cached_from_pretrained.__signature__ = sig + + mp.setattr(Model, "from_pretrained", _cached_from_pretrained) + yield mp diff --git a/qai_hub_models/models/efficientvit_l2_cls/demo.py b/qai_hub_models/models/efficientvit_l2_cls/demo.py new file mode 100644 index 00000000..16784a0d --- /dev/null +++ b/qai_hub_models/models/efficientvit_l2_cls/demo.py @@ -0,0 +1,14 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from qai_hub_models.models._shared.imagenet_classifier.demo import imagenet_demo +from qai_hub_models.models.efficientvit_l2_cls.model import MODEL_ID, EfficientViT + + +def main(is_test: bool = False): + imagenet_demo(EfficientViT, MODEL_ID, is_test) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/efficientvit_l2_cls/evaluate.py b/qai_hub_models/models/efficientvit_l2_cls/evaluate.py new file mode 100644 index 00000000..b1349e95 --- /dev/null +++ b/qai_hub_models/models/efficientvit_l2_cls/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.efficientvit_l2_cls import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/efficientvit_l2_cls/export.py b/qai_hub_models/models/efficientvit_l2_cls/export.py new file mode 100644 index 00000000..4bff3d0b --- /dev/null +++ b/qai_hub_models/models/efficientvit_l2_cls/export.py @@ -0,0 +1,214 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import os +import warnings +from pathlib import Path +from typing import Any, Optional, cast + +import qai_hub as hub +import torch + +from qai_hub_models.models.common import ExportResult, TargetRuntime +from qai_hub_models.models.efficientvit_l2_cls import Model +from qai_hub_models.utils.args import ( + export_parser, + get_input_spec_kwargs, + get_model_kwargs, +) +from qai_hub_models.utils.compare import torch_inference +from qai_hub_models.utils.input_spec import make_torch_inputs +from qai_hub_models.utils.printing import ( + print_inference_metrics, + print_on_target_demo_cmd, + print_profile_metrics_from_job, +) +from qai_hub_models.utils.qai_hub_helpers import ( + can_access_qualcomm_ai_hub, + export_without_hub_access, +) + + +def export_model( + device: Optional[str] = None, + chipset: Optional[str] = None, + skip_profiling: bool = False, + skip_inferencing: bool = False, + skip_downloading: bool = False, + skip_summary: bool = False, + output_dir: Optional[str] = None, + target_runtime: TargetRuntime = TargetRuntime.TFLITE, + compile_options: str = "", + profile_options: str = "", + **additional_model_kwargs, +) -> ExportResult | list[str]: + """ + This function executes the following recipe: + + 1. Instantiates a PyTorch model and converts it to a traced TorchScript format + 2. Compiles the model to an asset that can be run on device + 3. Profiles the model performance on a real device + 4. Inferences the model on sample inputs + 5. Downloads the model asset to the local directory + 6. Summarizes the results from profiling and inference + + Each of the last 4 steps can be optionally skipped using the input options. + + Parameters: + device: Device for which to export the model. + Full list of available devices can be found by running `hub.get_devices()`. + Defaults to DEFAULT_DEVICE if not specified. + chipset: If set, will choose a random device with this chipset. + Overrides the `device` argument. + skip_profiling: If set, skips profiling of compiled model on real devices. + skip_inferencing: If set, skips computing on-device outputs from sample data. + skip_downloading: If set, skips downloading of compiled model. + skip_summary: If set, skips waiting for and summarizing results + from profiling and inference. + output_dir: Directory to store generated assets (e.g. compiled model). + Defaults to `/build/`. + target_runtime: Which on-device runtime to target. Default is TFLite. + compile_options: Additional options to pass when submitting the compile job. + profile_options: Additional options to pass when submitting the profile job. + **additional_model_kwargs: Additional optional kwargs used to customize + `model_cls.from_pretrained` and `model.get_input_spec` + + Returns: + A struct of: + * A CompileJob object containing metadata about the compile job submitted to hub. + * An InferenceJob containing metadata about the inference job (None if inferencing skipped). + * A ProfileJob containing metadata about the profile job (None if profiling skipped). + """ + model_name = "efficientvit_l2_cls" + output_path = Path(output_dir or Path.cwd() / "build" / model_name) + if not device and not chipset: + raise ValueError("Device or Chipset must be provided.") + hub_device = hub.Device( + name=device or "", attributes=f"chipset:{chipset}" if chipset else None + ) + if not can_access_qualcomm_ai_hub(): + return export_without_hub_access( + "efficientvit_l2_cls", + "EfficientViT-l2-cls", + device or f"Device (Chipset {chipset})", + skip_profiling, + skip_inferencing, + skip_downloading, + skip_summary, + output_path, + target_runtime, + compile_options, + profile_options, + ) + + # On-device perf improves with I/O in channel_last format except when using ONNX. + use_channel_last_format = target_runtime != TargetRuntime.ONNX + + # 1. Instantiates a PyTorch model and converts it to a traced TorchScript format + model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs)) + input_spec = model.get_input_spec( + **get_input_spec_kwargs(model, additional_model_kwargs) + ) + + # Trace the model + source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) + + # 2. Compiles the model to an asset that can be run on device + model_compile_options = model.get_hub_compile_options( + target_runtime, compile_options, hub_device + ) + print(f"Optimizing model {model_name} to run on-device") + submitted_compile_job = hub.submit_compile_job( + model=source_model, + input_specs=input_spec, + device=hub_device, + name=model_name, + options=model_compile_options, + ) + compile_job = cast(hub.client.CompileJob, submitted_compile_job) + + # 3. Profiles the model performance on a real device + profile_job: Optional[hub.client.ProfileJob] = None + if not skip_profiling: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print(f"Profiling model {model_name} on a hosted device.") + submitted_profile_job = hub.submit_profile_job( + model=compile_job.get_target_model(), + device=hub_device, + name=model_name, + options=profile_options_all, + ) + profile_job = cast(hub.client.ProfileJob, submitted_profile_job) + + # 4. Inferences the model on sample inputs + inference_job: Optional[hub.client.InferenceJob] = None + if not skip_inferencing: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print( + f"Running inference for {model_name} on a hosted device with example inputs." + ) + sample_inputs = model.sample_inputs( + input_spec, use_channel_last_format=use_channel_last_format + ) + submitted_inference_job = hub.submit_inference_job( + model=compile_job.get_target_model(), + inputs=sample_inputs, + device=hub_device, + name=model_name, + options=profile_options_all, + ) + inference_job = cast(hub.client.InferenceJob, submitted_inference_job) + + # 5. Downloads the model asset to the local directory + if not skip_downloading: + os.makedirs(output_path, exist_ok=True) + target_model: hub.Model = compile_job.get_target_model() # type: ignore + target_model.download(str(output_path / model_name)) + + # 6. Summarizes the results from profiling and inference + if not skip_summary and not skip_profiling: + assert profile_job is not None and profile_job.wait().success + profile_data: dict[str, Any] = profile_job.download_profile() # type: ignore + print_profile_metrics_from_job(profile_job, profile_data) + + if not skip_summary and not skip_inferencing: + sample_inputs = model.sample_inputs(use_channel_last_format=False) + torch_out = torch_inference( + model, sample_inputs, return_channel_last_output=use_channel_last_format + ) + assert inference_job is not None and inference_job.wait().success + inference_result: hub.client.DatasetEntries = inference_job.download_output_data() # type: ignore + + print_inference_metrics( + inference_job, inference_result, torch_out, model.get_output_names() + ) + + if not skip_summary: + print_on_target_demo_cmd(compile_job, Path(__file__).parent, hub_device) + + return ExportResult( + compile_job=compile_job, + inference_job=inference_job, + profile_job=profile_job, + ) + + +def main(): + warnings.filterwarnings("ignore") + parser = export_parser(model_cls=Model) + args = parser.parse_args() + export_model(**vars(args)) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/efficientvit_l2_cls/info.yaml b/qai_hub_models/models/efficientvit_l2_cls/info.yaml new file mode 100644 index 00000000..b5d61b08 --- /dev/null +++ b/qai_hub_models/models/efficientvit_l2_cls/info.yaml @@ -0,0 +1,46 @@ +name: EfficientViT-l2-cls +# id must match with the model dir name in qai_hub_models +id: efficientvit_l2_cls +status: public +private_perf_form_factors: + - auto +headline: Imagenet classifier and general purpose backbone. +domain: Computer Vision +description: EfficientViT is a machine learning model that can classify images from + the Imagenet dataset. It can also be used as a backbone in building more complex + models for specific use cases. +use_case: Image Classification +tags: + - backbone + - real-time +research_paper: https://arxiv.org/abs/2205.14756 +research_paper_title: 'EfficientViT: Multi-Scale Linear Attention for High-Resolution Dense Prediction' +license: https://github.com/CVHub520/efficientvit/blob/main/LICENSE +deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf +source_repo: https://github.com/CVHub520/efficientvit +technical_details: + Model checkpoint: Imagenet + Input resolution: 224x224 + Number of parameters: 64M + Model size: 243 MB +applicable_scenarios: + - Medical Imaging + - Anomaly Detection + - Inventory Management +related_models: + - mobilenet_v2_quantized + - squeezenet1_1 + - densenet121 + - googlenet +form_factors: + - Phone + - Tablet + - IoT + - XR +has_static_banner: true +has_animated_banner: true +license_type: bsd-3-clause +deploy_license_type: AI Model Hub License +dataset: + - imagenet-1k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/efficientvit_l2_cls/model.py b/qai_hub_models/models/efficientvit_l2_cls/model.py new file mode 100644 index 00000000..a5303dec --- /dev/null +++ b/qai_hub_models/models/efficientvit_l2_cls/model.py @@ -0,0 +1,43 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from __future__ import annotations + +import torch + +from qai_hub_models.models._shared.imagenet_classifier.model import ImagenetClassifier +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, SourceAsRoot + +EFFICIENTVIT_SOURCE_REPOSITORY = "https://github.com/CVHub520/efficientvit" +EFFICIENTVIT_SOURCE_REPO_COMMIT = "6ecbe58ab66bf83d8f784dc4a6296b185d64e4b8" +MODEL_ID = __name__.split(".")[-2] + +DEFAULT_WEIGHTS = "l2-r384.pt" +MODEL_ASSET_VERSION = 1 + + +class EfficientViT(ImagenetClassifier): + """Exportable EfficientViT Image classifier, end-to-end.""" + + @classmethod + def from_pretrained(cls, weights: str | None = None): + """Load EfficientViT from a weightfile created by the source repository.""" + with SourceAsRoot( + EFFICIENTVIT_SOURCE_REPOSITORY, + EFFICIENTVIT_SOURCE_REPO_COMMIT, + MODEL_ID, + MODEL_ASSET_VERSION, + ): + from efficientvit.cls_model_zoo import create_cls_model + + if not weights: + pass + weights = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_WEIGHTS + ).fetch() + + efficientvit_model = create_cls_model(name="l2", weight_url=weights) + efficientvit_model.to(torch.device("cpu")) + efficientvit_model.eval() + return cls(efficientvit_model) diff --git a/qai_hub_models/models/efficientvit_l2_cls/perf.yaml b/qai_hub_models/models/efficientvit_l2_cls/perf.yaml new file mode 100644 index 00000000..1d3209d8 --- /dev/null +++ b/qai_hub_models/models/efficientvit_l2_cls/perf.yaml @@ -0,0 +1,317 @@ +aggregated: + supported_devices: + - Snapdragon 8 Elite QRD + - Samsung Galaxy S24 + - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ + - Samsung Galaxy S23 + - Samsung Galaxy S23 Ultra + - Samsung Galaxy S23+ + - Samsung Galaxy S22 5G + - Samsung Galaxy S22 Ultra 5G + - Samsung Galaxy S22+ 5G + - Samsung Galaxy Tab S8 + - Xiaomi 12 + - Xiaomi 12 Pro + - Samsung Galaxy S21 + - Samsung Galaxy S21 Ultra + - Samsung Galaxy S21+ + - Snapdragon X Elite CRD + - Snapdragon X Plus 8-Core CRD + - QCS8450 (Proxy) + - XR2 Gen 2 (Proxy) + - QCS8550 (Proxy) + - SA8255 (Proxy) + - SA8295P ADP + - SA8650 (Proxy) + - SA8775 (Proxy) + supported_chipsets: + - Snapdragon® 8 Elite + - Snapdragon® 8 Gen 3 + - Snapdragon® 8 Gen 2 + - Snapdragon® 8 Gen 1 + - Snapdragon® 888 + - Snapdragon® X Elite + - Snapdragon® X Plus 8-Core + - QCS8450 Proxy + - QCS8550 Proxy + - SA8255P Proxy + - SA8295P + - SA8650P Proxy + - SA8775P Proxy +models: +- name: EfficientViT-l2-cls + performance_metrics: + - torchscript_onnx_tflite: + inference_time: 22208.0 + throughput: 45.028818443804035 + estimated_peak_memory_range: + min: 90112 + max: 2712656 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 675 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 675 + job_id: jp14d1v2p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 23116.0 + throughput: 43.26007959854646 + estimated_peak_memory_range: + min: 45056 + max: 34368896 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 730 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 730 + job_id: jpy14277p + job_status: Passed + torchscript_onnx: + inference_time: 16724.0 + throughput: 59.794307581918204 + estimated_peak_memory_range: + min: 40960 + max: 169429112 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 708 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 708 + job_id: jgjv0ywxg + job_status: Passed + reference_device_info: + name: Samsung Galaxy S23 + os: '13' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 2 + timestamp: '2024-11-09T23:12:19Z' + - torchscript_onnx_tflite: + inference_time: 15834.0 + throughput: 63.155235569028676 + estimated_peak_memory_range: + min: 40960 + max: 360238976 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 675 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 675 + job_id: jgdxr4zep + job_status: Passed + torchscript_onnx_qnn: + inference_time: 16607.0 + throughput: 60.21557174685373 + estimated_peak_memory_range: + min: 643072 + max: 90341536 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 730 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 730 + job_id: jp0z19v65 + job_status: Passed + torchscript_onnx: + inference_time: 11551.0 + throughput: 86.57259111765215 + estimated_peak_memory_range: + min: 630784 + max: 466841136 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 708 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 708 + job_id: jpedrxl15 + job_status: Passed + reference_device_info: + name: Samsung Galaxy S24 + os: '14' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T23:12:20Z' + - torchscript_onnx_tflite: + inference_time: 17024.0 + throughput: 58.7406015037594 + estimated_peak_memory_range: + min: 36864 + max: 132503888 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 675 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 675 + job_id: j57yjn7l5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 17806.0 + throughput: 56.160844659103674 + estimated_peak_memory_range: + min: 614400 + max: 95204912 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 730 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 730 + job_id: jp8q3r4xp + job_status: Passed + torchscript_onnx: + inference_time: 10538.0 + throughput: 94.89466691971911 + estimated_peak_memory_range: + min: 626688 + max: 176619120 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 708 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 708 + job_id: jgz3xy4k5 + job_status: Passed + reference_device_info: + name: Snapdragon 8 Elite QRD + os: '15' + form_factor: Phone + os_name: Android + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T23:12:21Z' + - torchscript_onnx_tflite: + inference_time: 22017.0 + throughput: 45.4194486078939 + estimated_peak_memory_range: + min: 49152 + max: 2504664 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 675 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 675 + job_id: jp4lx49v5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 16745.0 + throughput: 59.719319199761124 + estimated_peak_memory_range: + min: 663552 + max: 2023368 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 730 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 730 + job_id: jgkel092g + job_status: Passed + reference_device_info: + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: QCS8550 Proxy + timestamp: '2024-11-09T23:12:10Z' + - torchscript_onnx_tflite: + inference_time: 20009.0 + throughput: 49.9775101204458 + estimated_peak_memory_range: + min: 57344 + max: 303472400 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 675 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 675 + job_id: jp2k7dv4p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 21486.0 + throughput: 46.541934282788795 + estimated_peak_memory_range: + min: 618496 + max: 65765232 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 730 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 730 + job_id: jpv61m9j5 + job_status: Passed + reference_device_info: + name: QCS8450 (Proxy) + os: '13' + form_factor: Xr + os_name: Android + manufacturer: Qualcomm + chipset: QCS8450 Proxy + timestamp: '2024-11-09T23:12:18Z' + - torchscript_onnx_qnn: + inference_time: 17693.0 + throughput: 56.51952749675013 + estimated_peak_memory_range: + min: 602112 + max: 602112 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 730 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 730 + job_id: j5q671m4p + job_status: Passed + torchscript_onnx: + inference_time: 18851.0 + throughput: 53.04758368256326 + estimated_peak_memory_range: + min: 137261056 + max: 137261056 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 708 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 708 + job_id: j5wedz465 + job_status: Passed + reference_device_info: + name: Snapdragon X Elite CRD + os: '11' + form_factor: Compute + os_name: Windows + manufacturer: Qualcomm + chipset: Snapdragon® X Elite + timestamp: '2024-11-09T23:12:22Z' diff --git a/qai_hub_models/models/efficientvit_l2_cls/requirements.txt b/qai_hub_models/models/efficientvit_l2_cls/requirements.txt new file mode 100644 index 00000000..509b76b3 --- /dev/null +++ b/qai_hub_models/models/efficientvit_l2_cls/requirements.txt @@ -0,0 +1,5 @@ +onnxruntime<1.20.0 +onnxsim<=0.4.36 +segment-anything==1.0 +timm==1.0.3 +torchpack==0.3.1 diff --git a/qai_hub_models/models/efficientvit_l2_cls/test.py b/qai_hub_models/models/efficientvit_l2_cls/test.py new file mode 100644 index 00000000..6c2cf0e6 --- /dev/null +++ b/qai_hub_models/models/efficientvit_l2_cls/test.py @@ -0,0 +1,38 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +import pytest + +from qai_hub_models.models._shared.imagenet_classifier.test_utils import ( + run_imagenet_classifier_test, + run_imagenet_classifier_trace_test, +) +from qai_hub_models.models.efficientvit_l2_cls.demo import main as demo_main +from qai_hub_models.models.efficientvit_l2_cls.model import ( + MODEL_ASSET_VERSION, + MODEL_ID, + EfficientViT, +) +from qai_hub_models.utils.testing import skip_clone_repo_check + + +@skip_clone_repo_check +def test_task(): + run_imagenet_classifier_test( + EfficientViT.from_pretrained(), + MODEL_ID, + asset_version=MODEL_ASSET_VERSION, + probability_threshold=0.39, + ) + + +@pytest.mark.trace +@skip_clone_repo_check +def test_trace(): + run_imagenet_classifier_trace_test(EfficientViT.from_pretrained()) + + +@skip_clone_repo_check +def test_demo(): + demo_main(is_test=True) diff --git a/qai_hub_models/models/esrgan/README.md b/qai_hub_models/models/esrgan/README.md index 80590084..347cb914 100644 --- a/qai_hub_models/models/esrgan/README.md +++ b/qai_hub_models/models/esrgan/README.md @@ -5,8 +5,7 @@ ESRGAN is a machine learning model that upscales an image with minimal loss in quality. -This is based on the implementation of ESRGAN found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/esrgan). diff --git a/qai_hub_models/models/esrgan/perf.yaml b/qai_hub_models/models/esrgan/perf.yaml index c0fd9ea0..fa95569a 100644 --- a/qai_hub_models/models/esrgan/perf.yaml +++ b/qai_hub_models/models/esrgan/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: ESRGAN performance_metrics: - torchscript_onnx_tflite: - inference_time: 62766.0 - throughput: 15.932192588344009 + inference_time: 65917.0 + throughput: 15.17059332190482 estimated_peak_memory_range: - min: 3166208 - max: 6200536 + min: 3264512 + max: 6475952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: jp0z2zne5 + job_id: jg9l328vg job_status: Passed torchscript_onnx_qnn: - inference_time: 69706.0 - throughput: 14.345967348578315 + inference_time: 70267.0 + throughput: 14.231431539698578 estimated_peak_memory_range: - min: 102400 - max: 222673136 + min: 118784 + max: 225835160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: jpedwdj05 + job_id: j5mnwkowp job_status: Passed torchscript_onnx: - inference_time: 70065.0 - throughput: 14.272461285948761 + inference_time: 71498.0 + throughput: 13.986405214131864 estimated_peak_memory_range: - min: 106496 - max: 44569072 + min: 114688 + max: 44449536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jgn6m62m5 + job_id: j56y3mr0p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:25:35Z' + timestamp: '2024-11-09T23:11:30Z' - torchscript_onnx_tflite: - inference_time: 72782.0 - throughput: 13.73966090516886 + inference_time: 48914.0 + throughput: 20.444044649793515 estimated_peak_memory_range: - min: 3162112 - max: 6211984 + min: 3407872 + max: 693788464 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: jp8qmql8p + job_id: jp14d13lp job_status: Passed torchscript_onnx_qnn: - inference_time: 66876.0 - throughput: 14.953047431066452 + inference_time: 50782.0 + throughput: 19.69201685636643 estimated_peak_memory_range: - min: 118784 - max: 118881872 + min: 143360 + max: 112909824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: jgz3j3165 + job_id: jgn69qor5 job_status: Passed torchscript_onnx: - inference_time: 70779.0 - throughput: 14.128484437474393 + inference_time: 52531.0 + throughput: 19.036378519350478 estimated_peak_memory_range: - min: 131072 - max: 44487904 + min: 6639616 + max: 730138624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jprv2vkeg + job_id: jp3j47xlg job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:25:36Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T23:11:31Z' - torchscript_onnx_tflite: - inference_time: 55143.0 - throughput: 18.13466804490144 + inference_time: 37710.0 + throughput: 26.518164942985944 estimated_peak_memory_range: - min: 3289088 - max: 693009024 + min: 3162112 + max: 192149888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: jgkeqejog + job_id: jgdxr40lp job_status: Passed torchscript_onnx_qnn: - inference_time: 55546.0 - throughput: 18.003096532603607 + inference_time: 42470.0 + throughput: 23.54603249352484 estimated_peak_memory_range: - min: 90112 - max: 112513888 + min: 0 + max: 134620080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: j5we3ejj5 + job_id: jprv4do9g job_status: Passed torchscript_onnx: - inference_time: 55352.0 - throughput: 18.06619453678277 + inference_time: 43058.0 + throughput: 23.224487900041805 estimated_peak_memory_range: - min: 6426624 - max: 729367360 + min: 6492160 + max: 191734336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jp2k9k8mp + job_id: jgo21woxp job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:25:37Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T23:11:31Z' - torchscript_onnx_tflite: - inference_time: 38328.0 - throughput: 26.09058651638489 + inference_time: 63196.0 + throughput: 15.823786315589594 estimated_peak_memory_range: - min: 3162112 - max: 192160640 + min: 3256320 + max: 6224456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: jgjv2vj8g + job_id: jg9l32klg job_status: Passed torchscript_onnx_qnn: - inference_time: 42545.0 - throughput: 23.50452462098954 + inference_time: 64385.0 + throughput: 15.531567911780694 estimated_peak_memory_range: - min: 8192 - max: 136788112 + min: 397312 + max: 1576336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: j5mn6nrqp - job_status: Passed - torchscript_onnx: - inference_time: 38244.0 - throughput: 26.147892479866123 - estimated_peak_memory_range: - min: 0 - max: 185669728 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 1028 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 1028 - job_id: jp0z2zye5 + job_id: jp2k7d44p job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:25:39Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T23:11:22Z' - torchscript_onnx_tflite: - inference_time: 66687.0 - throughput: 14.99542639494954 + inference_time: 67890.0 + throughput: 14.729709824716453 estimated_peak_memory_range: - min: 3211264 - max: 7563696 + min: 3260416 + max: 5546888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: j5q6r6jmp + job_id: jp14d172p job_status: Passed torchscript_onnx_qnn: - inference_time: 61957.0 - throughput: 16.140226285972528 + inference_time: 62524.0 + throughput: 15.993858358390378 estimated_peak_memory_range: min: 417792 - max: 1688384 + max: 1737592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: jg9lyl6vg + job_id: jp0z19d65 job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:25:27Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T23:11:24Z' - torchscript_onnx_tflite: - inference_time: 67661.0 - throughput: 14.779562820531769 + inference_time: 71251.0 + throughput: 14.034890738375601 estimated_peak_memory_range: - min: 4329472 - max: 6748080 + min: 3293184 + max: 6161592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: jp8qm9ezp + job_id: jgdxr48ep job_status: Passed torchscript_onnx_qnn: - inference_time: 63356.0 - throughput: 15.783824736410127 + inference_time: 63428.0 + throughput: 15.76590780097118 estimated_peak_memory_range: - min: 421888 - max: 1708856 + min: 327680 + max: 1562584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: jglv2zye5 + job_id: jp8q3r6xp job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:54:25Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T23:11:25Z' - torchscript_onnx_tflite: - inference_time: 63249.0 - throughput: 15.810526648642666 + inference_time: 65346.0 + throughput: 15.303155510666299 estimated_peak_memory_range: - min: 3252224 - max: 6425520 + min: 3276800 + max: 5470984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: jgkeqn2yg + job_id: j57yjnkl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 65459.0 - throughput: 15.276738110878565 + inference_time: 63333.0 + throughput: 15.789556787140985 estimated_peak_memory_range: - min: 405504 - max: 1830920 + min: 425984 + max: 1704080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: j56yzj8vp + job_id: jgkel0o2g job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:54:26Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T23:11:26Z' - torchscript_onnx_tflite: - inference_time: 68231.0 - throughput: 14.65609473699638 + inference_time: 112057.0 + throughput: 8.924029734867077 estimated_peak_memory_range: - min: 3276800 - max: 6042544 + min: 3321856 + max: 177957264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: j5q6rkl7p + job_id: jp4lx4mv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 67728.0 - throughput: 14.764942121426884 + inference_time: 111030.0 + throughput: 9.00657479960371 estimated_peak_memory_range: - min: 438272 - max: 1616816 + min: 372736 + max: 6066064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: jp3j13zxg + job_id: j5q671z4p job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:54:27Z' + chipset: SA8295P + timestamp: '2024-11-09T23:11:27Z' - torchscript_onnx_tflite: - inference_time: 169262.0 - throughput: 5.908000614432064 + inference_time: 157613.0 + throughput: 6.344654311509838 estimated_peak_memory_range: - min: 3207168 - max: 650723936 + min: 16384 + max: 648108160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: jgo2n2jdp + job_id: jpxk7r315 job_status: Passed torchscript_onnx_qnn: - inference_time: 142038.0 - throughput: 7.040369478590236 + inference_time: 128296.0 + throughput: 7.794475275924425 estimated_peak_memory_range: - min: 319488 - max: 90572176 + min: 315392 + max: 89276368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: jpxk6kv95 + job_id: jglv08o85 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,13 +428,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:25:32Z' + timestamp: '2024-11-09T23:11:28Z' - torchscript_onnx_qnn: - inference_time: 64876.0 - throughput: 15.414020593131513 + inference_time: 64877.0 + throughput: 15.413783004762859 estimated_peak_memory_range: - min: 225280 - max: 225280 + min: 221184 + max: 221184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: jp14w4rlp + job_id: jpy142q7p job_status: Passed torchscript_onnx: - inference_time: 65573.0 - throughput: 15.250179189605477 + inference_time: 65632.0 + throughput: 15.236470014627011 estimated_peak_memory_range: - min: 39829504 - max: 39829504 + min: 40718336 + max: 40718336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jpy1j1e4p + job_id: jpv61mej5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:25:38Z' + timestamp: '2024-11-09T23:11:33Z' diff --git a/qai_hub_models/models/face_attrib_net/README.md b/qai_hub_models/models/face_attrib_net/README.md index dfdbde69..39f4f7bf 100644 --- a/qai_hub_models/models/face_attrib_net/README.md +++ b/qai_hub_models/models/face_attrib_net/README.md @@ -5,8 +5,7 @@ Facial feature extraction and additional attributes including liveness, eyeclose, mask and glasses detection for face recognition. -This is based on the implementation of FaceAttribNet found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/face_attrib_net). diff --git a/qai_hub_models/models/face_attrib_net/info.yaml b/qai_hub_models/models/face_attrib_net/info.yaml index 89f50dc4..ea147b80 100644 --- a/qai_hub_models/models/face_attrib_net/info.yaml +++ b/qai_hub_models/models/face_attrib_net/info.yaml @@ -23,7 +23,7 @@ form_factors: - Phone - Tablet - IoT -has_static_banner: false +has_static_banner: true has_animated_banner: false license_type: bsd-3-clause deploy_license_type: AI Model Hub License diff --git a/qai_hub_models/models/face_attrib_net/perf.yaml b/qai_hub_models/models/face_attrib_net/perf.yaml index 532e8084..d6ada872 100644 --- a/qai_hub_models/models/face_attrib_net/perf.yaml +++ b/qai_hub_models/models/face_attrib_net/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: FaceAttribNet performance_metrics: - torchscript_onnx_tflite: - inference_time: 967.0 - throughput: 1034.126163391934 + inference_time: 957.0 + throughput: 1044.932079414838 estimated_peak_memory_range: - min: 24576 - max: 2142632 + min: 28672 + max: 1920040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 161 - job_id: jgn6m64m5 + job_id: jgjv0yk8g job_status: Passed torchscript_onnx_qnn: - inference_time: 1047.0 - throughput: 955.1098376313277 + inference_time: 1041.0 + throughput: 960.6147934678194 estimated_peak_memory_range: - min: 0 - max: 79513792 + min: 212992 + max: 8833264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: j56yzyx7p + job_id: jpxk7rm95 job_status: Passed torchscript_onnx: - inference_time: 1199.0 - throughput: 834.0283569641368 + inference_time: 1200.0 + throughput: 833.3333333333334 estimated_peak_memory_range: - min: 12288 - max: 30847464 + min: 40960 + max: 381375976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 163 - job_id: jgdxqxklp + job_id: jglv087l5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:24:39Z' + timestamp: '2024-11-09T23:10:33Z' - torchscript_onnx_tflite: - inference_time: 961.0 - throughput: 1040.5827263267429 + inference_time: 710.0 + throughput: 1408.4507042253522 estimated_peak_memory_range: min: 16384 - max: 2172184 + max: 115467888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 161 - job_id: jprv2vreg + job_id: jpedrx405 job_status: Passed torchscript_onnx_qnn: - inference_time: 1052.0 - throughput: 950.5703422053232 + inference_time: 781.0 + throughput: 1280.4097311139565 estimated_peak_memory_range: - min: 147456 - max: 79733864 + min: 0 + max: 21376160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jp3j1jdzg + job_id: j5mnwk4qp job_status: Passed torchscript_onnx: - inference_time: 1198.0 - throughput: 834.7245409015025 + inference_time: 907.0 + throughput: 1102.5358324145534 estimated_peak_memory_range: - min: 12288 - max: 30844864 + min: 0 + max: 118260704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 163 - job_id: j57ylyqr5 + job_id: j56y3mv7p job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:24:40Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T23:10:34Z' - torchscript_onnx_tflite: - inference_time: 763.0 - throughput: 1310.615989515072 + inference_time: 658.0 + throughput: 1519.756838905775 estimated_peak_memory_range: - min: 16384 - max: 114877488 + min: 12288 + max: 62457600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 161 - job_id: jp2k9k1mp + job_id: jgz3xyv65 job_status: Passed torchscript_onnx_qnn: - inference_time: 861.0 - throughput: 1161.4401858304298 + inference_time: 702.0 + throughput: 1424.5014245014245 estimated_peak_memory_range: - min: 208896 - max: 22563936 + min: 0 + max: 21802080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jgo2n2xdp + job_id: jgn69qxm5 job_status: Passed torchscript_onnx: - inference_time: 917.0 - throughput: 1090.5125408942204 + inference_time: 760.0 + throughput: 1315.7894736842106 estimated_peak_memory_range: min: 0 - max: 118700464 + max: 64363584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 163 - job_id: jp4ldlzl5 + job_id: jp3j478zg job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:24:41Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T23:10:35Z' - torchscript_onnx_tflite: - inference_time: 661.0 - throughput: 1512.8593040847202 + inference_time: 947.0 + throughput: 1055.9662090813094 estimated_peak_memory_range: - min: 12288 - max: 62562192 + min: 20480 + max: 1473384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 161 - job_id: jglv2vdl5 + job_id: j5wedzmj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 621.0 - throughput: 1610.3059581320451 + inference_time: 998.0 + throughput: 1002.0040080160321 estimated_peak_memory_range: - min: 0 - max: 21732160 + min: 229376 + max: 1402160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jp14w49lp - job_status: Passed - torchscript_onnx: - inference_time: 853.0 - throughput: 1172.3329425556858 - estimated_peak_memory_range: - min: 0 - max: 64806384 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 163 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 163 - job_id: j5mn6njqp + job_id: jprv4d9eg job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:24:43Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T23:10:25Z' - torchscript_onnx_tflite: - inference_time: 965.0 - throughput: 1036.2694300518135 + inference_time: 977.0 + throughput: 1023.5414534288639 estimated_peak_memory_range: - min: 16384 - max: 1834544 + min: 28672 + max: 2356272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 161 - job_id: jpy1j1l4p + job_id: jg9l329vg job_status: Passed torchscript_onnx_qnn: - inference_time: 999.0 - throughput: 1001.001001001001 + inference_time: 1001.0 + throughput: 999.000999000999 estimated_peak_memory_range: - min: 233472 - max: 1395808 + min: 225280 + max: 1538840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jpv6r68m5 + job_id: jpy142n4p job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:24:31Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T23:10:27Z' - torchscript_onnx_tflite: - inference_time: 967.0 - throughput: 1034.126163391934 + inference_time: 978.0 + throughput: 1022.4948875255624 estimated_peak_memory_range: - min: 0 - max: 2154960 + min: 20480 + max: 1948176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 161 - job_id: j57ylw4q5 + job_id: jp14d1qlp job_status: Passed torchscript_onnx_qnn: - inference_time: 1008.0 - throughput: 992.063492063492 + inference_time: 998.0 + throughput: 1002.0040080160321 estimated_peak_memory_range: - min: 221184 - max: 1533408 + min: 229376 + max: 1505664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: j5mn62myp + job_id: jp0z19ke5 job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:53:51Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T23:10:28Z' - torchscript_onnx_tflite: - inference_time: 965.0 - throughput: 1036.2694300518135 + inference_time: 987.0 + throughput: 1013.1712259371834 estimated_peak_memory_range: - min: 28672 - max: 2302920 + min: 24576 + max: 2222704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 161 - job_id: jp4ldo1q5 + job_id: jgdxr47lp job_status: Passed torchscript_onnx_qnn: - inference_time: 1015.0 - throughput: 985.2216748768473 + inference_time: 996.0 + throughput: 1004.0160642570281 estimated_peak_memory_range: - min: 229376 - max: 1784616 + min: 237568 + max: 2050560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jgn6myzv5 + job_id: jp8q3r88p job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:53:53Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T23:10:29Z' - torchscript_onnx_tflite: - inference_time: 966.0 - throughput: 1035.1966873706003 + inference_time: 1663.0 + throughput: 601.3229104028864 estimated_peak_memory_range: - min: 32768 - max: 2345928 + min: 16384 + max: 54636128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 161 - job_id: jpxk6j4j5 + job_id: j57yjn6r5 job_status: Passed torchscript_onnx_qnn: - inference_time: 998.0 - throughput: 1002.0040080160321 + inference_time: 1742.0 + throughput: 574.052812858783 estimated_peak_memory_range: - min: 245760 - max: 1523144 + min: 0 + max: 5951616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jprv2qlvg + job_id: jgkel0dog job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:53:54Z' + chipset: SA8295P + timestamp: '2024-11-09T23:10:30Z' - torchscript_onnx_tflite: - inference_time: 1257.0 - throughput: 795.5449482895783 + inference_time: 1258.0 + throughput: 794.912559618442 estimated_peak_memory_range: min: 16384 - max: 106075856 + max: 106889680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 161 - job_id: j5q6r6nmp + job_id: jp4lx48l5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1334.0 - throughput: 749.6251874062968 + inference_time: 1356.0 + throughput: 737.4631268436578 estimated_peak_memory_range: - min: 204800 - max: 19409584 + min: 208896 + max: 22066560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jg9lylrvg + job_id: j5q671wmp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,13 +428,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:24:37Z' + timestamp: '2024-11-09T23:10:32Z' - torchscript_onnx_qnn: - inference_time: 1144.0 - throughput: 874.1258741258741 + inference_time: 1140.0 + throughput: 877.1929824561404 estimated_peak_memory_range: - min: 491520 - max: 491520 + min: 380928 + max: 380928 primary_compute_unit: NPU precision: fp16 layer_info: @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jgjv2v98g + job_id: jp2k7djmp job_status: Passed torchscript_onnx: - inference_time: 1286.0 - throughput: 777.6049766718507 + inference_time: 1213.0 + throughput: 824.4023083264633 estimated_peak_memory_range: - min: 28254208 - max: 28254208 + min: 28073984 + max: 28073984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 163 - job_id: jpxk6kw95 + job_id: jgo21wmdp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:24:42Z' + timestamp: '2024-11-09T23:10:36Z' diff --git a/qai_hub_models/models/face_det_lite/README.md b/qai_hub_models/models/face_det_lite/README.md index 6bf27217..ed98fc9d 100644 --- a/qai_hub_models/models/face_det_lite/README.md +++ b/qai_hub_models/models/face_det_lite/README.md @@ -5,8 +5,7 @@ face_det_lite is a machine learning model that detect face in the images -This is based on the implementation of Lightweight-Face-Detection found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/face_det_lite). diff --git a/qai_hub_models/models/face_det_lite/info.yaml b/qai_hub_models/models/face_det_lite/info.yaml index b0c50062..a50c8409 100644 --- a/qai_hub_models/models/face_det_lite/info.yaml +++ b/qai_hub_models/models/face_det_lite/info.yaml @@ -24,7 +24,7 @@ form_factors: - Phone - Tablet - IoT -has_static_banner: false +has_static_banner: true has_animated_banner: false license_type: bsd-3-clause deploy_license_type: AI Model Hub License diff --git a/qai_hub_models/models/face_det_lite/perf.yaml b/qai_hub_models/models/face_det_lite/perf.yaml index 186a0f44..605fabb6 100644 --- a/qai_hub_models/models/face_det_lite/perf.yaml +++ b/qai_hub_models/models/face_det_lite/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: Lightweight-Face-Detection performance_metrics: - torchscript_onnx_tflite: - inference_time: 1018.0 - throughput: 982.3182711198428 + inference_time: 1022.0 + throughput: 978.4735812133073 estimated_peak_memory_range: - min: 16384 - max: 1345272 + min: 12288 + max: 3516528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: j57ylynq5 + job_id: jp8q3r78p job_status: Passed torchscript_onnx_qnn: - inference_time: 1097.0 - throughput: 911.5770282588878 + inference_time: 1103.0 + throughput: 906.6183136899365 estimated_peak_memory_range: - min: 1261568 - max: 5904448 + min: 2113536 + max: 7250344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: jp8qmqkzp + job_id: jpedrxv05 job_status: Passed torchscript_onnx: - inference_time: 2211.0 - throughput: 452.2840343735866 + inference_time: 2237.0 + throughput: 447.02726866338844 estimated_peak_memory_range: - min: 999424 - max: 3241064 + min: 372736 + max: 2637752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgz3j30z5 + job_id: jgn69q9m5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:23:10Z' + timestamp: '2024-11-09T23:09:06Z' - torchscript_onnx_tflite: - inference_time: 1020.0 - throughput: 980.3921568627451 + inference_time: 682.0 + throughput: 1466.275659824047 estimated_peak_memory_range: min: 12288 - max: 1528040 + max: 33085152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jp4ldl4q5 + job_id: jgkel0yog job_status: Passed torchscript_onnx_qnn: - inference_time: 1094.0 - throughput: 914.0767824497258 + inference_time: 735.0 + throughput: 1360.544217687075 estimated_peak_memory_range: - min: 1245184 - max: 5631376 + min: 0 + max: 13105184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: jgkeqekyg + job_id: jgz3xy765 job_status: Passed torchscript_onnx: - inference_time: 2236.0 - throughput: 447.2271914132379 + inference_time: 1375.0 + throughput: 727.2727272727273 estimated_peak_memory_range: - min: 1245184 - max: 3670056 + min: 0 + max: 42034112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j5we3erz5 + job_id: jprv4d4eg job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:23:11Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T23:09:07Z' - torchscript_onnx_tflite: - inference_time: 734.0 - throughput: 1362.3978201634877 + inference_time: 750.0 + throughput: 1333.3333333333333 estimated_peak_memory_range: - min: 12288 - max: 32948976 + min: 8192 + max: 20267504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jpxk6krj5 + job_id: j5q6712mp job_status: Passed torchscript_onnx_qnn: - inference_time: 796.0 - throughput: 1256.2814070351758 + inference_time: 756.0 + throughput: 1322.7513227513227 estimated_peak_memory_range: min: 0 - max: 14668352 + max: 10568384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: j5q6r6d7p + job_id: j5wedz9j5 job_status: Passed torchscript_onnx: - inference_time: 1450.0 - throughput: 689.6551724137931 + inference_time: 1214.0 + throughput: 823.7232289950576 estimated_peak_memory_range: min: 0 - max: 42307360 + max: 24251616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jg9lylqqg + job_id: jp2k7d7mp job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:23:13Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T23:09:08Z' - torchscript_onnx_tflite: - inference_time: 710.0 - throughput: 1408.4507042253522 + inference_time: 1053.0 + throughput: 949.667616334283 estimated_peak_memory_range: - min: 8192 - max: 19893680 + min: 16384 + max: 1404024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jp0z2zx25 + job_id: jglv08kl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 754.0 - throughput: 1326.2599469496022 + inference_time: 1074.0 + throughput: 931.0986964618249 estimated_peak_memory_range: - min: 0 - max: 10839648 + min: 1261568 + max: 2610728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: jpedwdn75 - job_status: Passed - torchscript_onnx: - inference_time: 1480.0 - throughput: 675.6756756756756 - estimated_peak_memory_range: - min: 0 - max: 24473536 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 126 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 126 - job_id: jgdxqxmkp + job_id: jg9l324vg job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:23:15Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T23:08:58Z' - torchscript_onnx_tflite: - inference_time: 1015.0 - throughput: 985.2216748768473 + inference_time: 1019.0 + throughput: 981.3542688910696 estimated_peak_memory_range: min: 12288 - max: 6938608 + max: 1490208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: j5mn6nkyp + job_id: j56y3m17p job_status: Passed torchscript_onnx_qnn: - inference_time: 1071.0 - throughput: 933.7068160597572 + inference_time: 1080.0 + throughput: 925.925925925926 estimated_peak_memory_range: - min: 1245184 - max: 2496048 + min: 1257472 + max: 2634096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: jglv2vqe5 + job_id: jgdxr4vlp job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:23:02Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T23:09:00Z' - torchscript_onnx_tflite: - inference_time: 1021.0 - throughput: 979.4319294809011 + inference_time: 1019.0 + throughput: 981.3542688910696 estimated_peak_memory_range: - min: 28672 - max: 1615984 + min: 24576 + max: 1361288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jgn6mynv5 + job_id: jp3j47mzg job_status: Passed torchscript_onnx_qnn: inference_time: 1079.0 throughput: 926.7840593141798 estimated_peak_memory_range: - min: 24576 - max: 1473144 + min: 53248 + max: 1192104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: jpy1jwxrp + job_id: j57yjnjr5 job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:53:08Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T23:09:01Z' - torchscript_onnx_tflite: inference_time: 1019.0 throughput: 981.3542688910696 estimated_peak_memory_range: min: 12288 - max: 41982232 + max: 1499120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jprv2q0vg + job_id: jgo21wvdp job_status: Passed torchscript_onnx_qnn: - inference_time: 1087.0 - throughput: 919.9632014719411 + inference_time: 1077.0 + throughput: 928.5051067780872 estimated_peak_memory_range: - min: 32768 - max: 1413552 + min: 28672 + max: 1211224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: jp0z2qj25 + job_id: jp4lx4xl5 job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:53:09Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T23:09:02Z' - torchscript_onnx_tflite: - inference_time: 1026.0 - throughput: 974.6588693957115 + inference_time: 1769.0 + throughput: 565.2911249293386 estimated_peak_memory_range: - min: 20480 - max: 1577856 + min: 12288 + max: 18876432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jp2k96wxp + job_id: jpv61mwm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1074.0 - throughput: 931.0986964618249 + inference_time: 1984.0 + throughput: 504.03225806451616 estimated_peak_memory_range: - min: 1261568 - max: 2383264 + min: 794624 + max: 6709552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: jp8qm9xzp + job_id: jpxk7r795 job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:53:11Z' + chipset: SA8295P + timestamp: '2024-11-09T23:09:03Z' - torchscript_onnx_tflite: inference_time: 1277.0 throughput: 783.0853563038371 estimated_peak_memory_range: min: 12288 - max: 33898288 + max: 32732624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jpy1j1yrp + job_id: jgjv0yl8g job_status: Passed torchscript_onnx_qnn: - inference_time: 1353.0 - throughput: 739.0983000739099 + inference_time: 1354.0 + throughput: 738.5524372230428 estimated_peak_memory_range: min: 1245184 - max: 17673584 + max: 16039200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: jgjv2v87g + job_id: j5mnwkwqp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:23:08Z' + timestamp: '2024-11-09T23:09:05Z' - torchscript_onnx_qnn: - inference_time: 1258.0 - throughput: 794.912559618442 + inference_time: 1284.0 + throughput: 778.816199376947 estimated_peak_memory_range: min: 1232896 max: 1232896 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: j56yzy0vp + job_id: jp14d18lp job_status: Passed torchscript_onnx: - inference_time: 2065.0 - throughput: 484.26150121065376 + inference_time: 2044.0 + throughput: 489.23679060665364 estimated_peak_memory_range: - min: 2437120 - max: 2437120 + min: 2629632 + max: 2629632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jp14w4mkp + job_id: jpy14244p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:23:14Z' + timestamp: '2024-11-09T23:09:09Z' diff --git a/qai_hub_models/models/facemap_3dmm/README.md b/qai_hub_models/models/facemap_3dmm/README.md index d320f639..4eee34ee 100644 --- a/qai_hub_models/models/facemap_3dmm/README.md +++ b/qai_hub_models/models/facemap_3dmm/README.md @@ -5,8 +5,7 @@ Facial landmark is a deep learning model that can predict 68 landmarks from a single image. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of Facial-Landmark-Detection found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/facemap_3dmm). diff --git a/qai_hub_models/models/facemap_3dmm/perf.yaml b/qai_hub_models/models/facemap_3dmm/perf.yaml index a974d5cf..c6859360 100644 --- a/qai_hub_models/models/facemap_3dmm/perf.yaml +++ b/qai_hub_models/models/facemap_3dmm/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: Facial-Landmark-Detection performance_metrics: - torchscript_onnx_tflite: - inference_time: 351.0 - throughput: 2849.002849002849 + inference_time: 353.0 + throughput: 2832.8611898016998 estimated_peak_memory_range: min: 28672 - max: 1451336 + max: 3214048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 47 - job_id: jgke3lkvg + job_id: jp0z19l25 job_status: Passed torchscript_onnx_qnn: - inference_time: 361.0 - throughput: 2770.083102493075 + inference_time: 356.0 + throughput: 2808.9887640449438 estimated_peak_memory_range: - min: 16384 - max: 169816336 + min: 0 + max: 37094816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 60 - job_id: j5q637dep + job_id: jgjv0ye7g job_status: Passed torchscript_onnx: - inference_time: 485.0 - throughput: 2061.855670103093 + inference_time: 479.0 + throughput: 2087.6826722338205 estimated_peak_memory_range: min: 12288 - max: 12402456 + max: 12417912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 1 total_layers: 60 - job_id: jg9lw3qmg + job_id: jgdxr4olp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:46:26Z' + timestamp: '2024-11-09T23:08:23Z' - torchscript_onnx_tflite: - inference_time: 349.0 - throughput: 2865.3295128939826 + inference_time: 270.0 + throughput: 3703.703703703704 estimated_peak_memory_range: min: 16384 - max: 5894736 + max: 25483184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 47 - job_id: jg9lyloqg + job_id: jp8q3rzzp job_status: Passed torchscript_onnx_qnn: - inference_time: 377.0 - throughput: 2652.5198938992044 + inference_time: 273.0 + throughput: 3663.003663003663 estimated_peak_memory_range: - min: 221184 - max: 1846920 + min: 212992 + max: 11650768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 60 - job_id: jglv30q25 + job_id: jpedrxk75 job_status: Passed torchscript_onnx: - inference_time: 486.0 - throughput: 2057.61316872428 + inference_time: 381.0 + throughput: 2624.6719160104985 estimated_peak_memory_range: - min: 36864 - max: 86832984 + min: 0 + max: 26440800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 1 total_layers: 60 - job_id: jpv6r6m75 + job_id: j57yjndr5 job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:46:18Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T23:08:24Z' - torchscript_onnx_tflite: - inference_time: 276.0 - throughput: 3623.1884057971015 + inference_time: 271.0 + throughput: 3690.036900369004 estimated_peak_memory_range: - min: 16384 - max: 25926240 + min: 12288 + max: 15849712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 47 - job_id: jp14w4okp + job_id: jgkel03yg job_status: Passed torchscript_onnx_qnn: - inference_time: 285.0 - throughput: 3508.7719298245615 + inference_time: 293.0 + throughput: 3412.9692832764504 estimated_peak_memory_range: min: 0 - max: 8662448 + max: 8953792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 60 - job_id: j56yn30np + job_id: jgz3xyrz5 job_status: Passed torchscript_onnx: - inference_time: 386.0 - throughput: 2590.6735751295337 + inference_time: 344.0 + throughput: 2906.9767441860463 estimated_peak_memory_range: min: 0 - max: 27701392 + max: 15993664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 1 total_layers: 60 - job_id: jgjv2vy7g + job_id: jp4lx4wl5 job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-31T14:46:19Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T23:08:25Z' - torchscript_onnx_tflite: - inference_time: 258.0 - throughput: 3875.968992248062 + inference_time: 351.0 + throughput: 2849.002849002849 estimated_peak_memory_range: min: 12288 - max: 15755600 + max: 1657608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 47 - job_id: jgn6m6qv5 + job_id: j5q67137p job_status: Passed torchscript_onnx_qnn: - inference_time: 280.0 - throughput: 3571.4285714285716 + inference_time: 352.0 + throughput: 2840.909090909091 estimated_peak_memory_range: - min: 0 - max: 8718576 + min: 225280 + max: 1570000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 60 - job_id: jp3je4rmg - job_status: Passed - torchscript_onnx: - inference_time: 330.0 - throughput: 3030.3030303030305 - estimated_peak_memory_range: - min: 0 - max: 15910368 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 59 - layers_on_gpu: 0 - layers_on_cpu: 1 - total_layers: 60 - job_id: jgz3j3yz5 + job_id: j5wedzqz5 job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-31T14:46:20Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T23:08:16Z' - torchscript_onnx_tflite: - inference_time: 354.0 - throughput: 2824.858757062147 + inference_time: 352.0 + throughput: 2840.909090909091 estimated_peak_memory_range: - min: 32768 - max: 1390936 + min: 24576 + max: 14610416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 47 - job_id: jgdxqx6kp + job_id: jglv083e5 job_status: Passed torchscript_onnx_qnn: - inference_time: 351.0 - throughput: 2849.002849002849 + inference_time: 357.0 + throughput: 2801.1204481792715 estimated_peak_memory_range: - min: 237568 - max: 1410048 + min: 229376 + max: 1819056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 60 - job_id: jgo23191p + job_id: jp14d1ekp job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-31T14:46:21Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T23:08:18Z' - torchscript_onnx_tflite: - inference_time: 346.0 - throughput: 2890.173410404624 + inference_time: 353.0 + throughput: 2832.8611898016998 estimated_peak_memory_range: - min: 16384 - max: 3992504 + min: 28672 + max: 1851152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 47 - job_id: jgz3j9nz5 + job_id: j56y3mnvp job_status: Passed torchscript_onnx_qnn: - inference_time: 356.0 - throughput: 2808.9887640449438 + inference_time: 355.0 + throughput: 2816.9014084507044 estimated_peak_memory_range: - min: 233472 - max: 1645856 + min: 266240 + max: 1462448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 60 - job_id: jp14wl6kp + job_id: jgdxr4okp job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:52:50Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T23:08:19Z' - torchscript_onnx_tflite: inference_time: 342.0 throughput: 2923.9766081871344 estimated_peak_memory_range: - min: 24576 - max: 1393496 + min: 32768 + max: 4423296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 47 - job_id: j5we3v4z5 + job_id: jp3j47exg job_status: Passed torchscript_onnx_qnn: - inference_time: 351.0 - throughput: 2849.002849002849 + inference_time: 352.0 + throughput: 2840.909090909091 estimated_peak_memory_range: - min: 229376 - max: 1481912 + min: 20480 + max: 1226368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 60 - job_id: jgdxq92kp + job_id: j5wedzqj5 job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:52:51Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T23:08:20Z' - torchscript_onnx_tflite: - inference_time: 354.0 - throughput: 2824.858757062147 + inference_time: 725.0 + throughput: 1379.3103448275863 estimated_peak_memory_range: - min: 20480 - max: 178302304 + min: 16384 + max: 14428176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 47 - job_id: jg9ly1dqg + job_id: jgo21w34p job_status: Passed torchscript_onnx_qnn: - inference_time: 352.0 - throughput: 2840.909090909091 + inference_time: 978.0 + throughput: 1022.4948875255624 estimated_peak_memory_range: - min: 229376 - max: 1612400 + min: 192512 + max: 6097440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 60 - job_id: j57ylw9q5 + job_id: jg9l32wvg job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:52:52Z' + chipset: SA8295P + timestamp: '2024-11-09T23:08:21Z' - torchscript_onnx_tflite: - inference_time: 452.0 - throughput: 2212.3893805309735 + inference_time: 447.0 + throughput: 2237.136465324385 estimated_peak_memory_range: min: 16384 - max: 26108880 + max: 25913280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 47 - job_id: j5mn6n9yp + job_id: jpv61mv75 job_status: Passed torchscript_onnx_qnn: - inference_time: 470.0 - throughput: 2127.659574468085 + inference_time: 463.0 + throughput: 2159.827213822894 estimated_peak_memory_range: - min: 40960 - max: 15004352 + min: 0 + max: 13746304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 60 - job_id: j5weqdr45 + job_id: jp14d1elp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,13 +428,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-31T14:46:26Z' + timestamp: '2024-11-09T23:08:22Z' - torchscript_onnx_qnn: - inference_time: 418.0 - throughput: 2392.3444976076553 + inference_time: 423.0 + throughput: 2364.066193853428 estimated_peak_memory_range: - min: 552960 - max: 552960 + min: 450560 + max: 450560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 60 - job_id: jpv6v1nz5 + job_id: jg9l32wqg job_status: Passed torchscript_onnx: - inference_time: 491.0 - throughput: 2036.6598778004072 + inference_time: 504.0 + throughput: 1984.126984126984 estimated_peak_memory_range: - min: 12288000 - max: 12288000 + min: 12181504 + max: 12181504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 1 total_layers: 60 - job_id: jpedwdx75 + job_id: jpxk7r195 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-31T14:46:22Z' + timestamp: '2024-11-09T23:08:26Z' diff --git a/qai_hub_models/models/fastsam_s/README.md b/qai_hub_models/models/fastsam_s/README.md index 882d25fb..a298c883 100644 --- a/qai_hub_models/models/fastsam_s/README.md +++ b/qai_hub_models/models/fastsam_s/README.md @@ -5,8 +5,7 @@ The Fast Segment Anything Model (FastSAM) is a novel, real-time CNN-based solution for the Segment Anything task. This task is designed to segment any object within an image based on various possible user interaction prompts. The model performs competitively despite significantly reduced computation, making it a practical choice for a variety of vision tasks. -This is based on the implementation of FastSam-S found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/fastsam_s). diff --git a/qai_hub_models/models/fastsam_s/perf.yaml b/qai_hub_models/models/fastsam_s/perf.yaml index 5756f2b1..9f88368d 100644 --- a/qai_hub_models/models/fastsam_s/perf.yaml +++ b/qai_hub_models/models/fastsam_s/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,55 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: FastSam-S performance_metrics: - torchscript_onnx_qnn: - inference_time: 8056.0 - throughput: 124.13108242303873 + inference_time: 8107.0 + throughput: 123.35019119279634 estimated_peak_memory_range: - min: 4939776 - max: 20125144 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 286 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 286 - job_id: jpxk6klj5 - job_status: Passed - torchscript_onnx: - inference_time: 9711.0 - throughput: 102.97600659046442 - estimated_peak_memory_range: - min: 3297280 - max: 25448520 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 289 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 289 - job_id: jglv2vwe5 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:21:47Z' - - torchscript_onnx_qnn: - inference_time: 8065.0 - throughput: 123.99256044637322 - estimated_peak_memory_range: - min: 6328320 - max: 21088408 + min: 4956160 + max: 20134848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -91,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: j5mn6n0yp + job_id: jp3j471xg job_status: Passed torchscript_onnx: - inference_time: 9710.0 - throughput: 102.98661174047373 + inference_time: 9468.0 + throughput: 105.61892691170257 estimated_peak_memory_range: - min: 3944448 - max: 26264016 + min: 12288 + max: 21801960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,7 +70,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: j56yzyovp + job_id: j57yjnlq5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -115,13 +79,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:21:48Z' + timestamp: '2024-11-09T23:07:42Z' - torchscript_onnx_qnn: - inference_time: 6548.0 - throughput: 152.71838729383018 + inference_time: 6167.0 + throughput: 162.15339711366954 estimated_peak_memory_range: - min: 4931584 - max: 40889296 + min: 4952064 + max: 42494272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -129,14 +93,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: jgn6m61v5 + job_id: jgo21wn4p job_status: Passed torchscript_onnx: - inference_time: 7252.0 - throughput: 137.89299503585218 + inference_time: 6778.0 + throughput: 147.5361463558572 estimated_peak_memory_range: - min: 14307328 - max: 97824800 + min: 16203776 + max: 102268592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -144,7 +108,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jp3j1joxg + job_id: jp4lx4dq5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -153,13 +117,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:21:49Z' + timestamp: '2024-11-09T23:07:43Z' - torchscript_onnx_qnn: - inference_time: 5478.0 - throughput: 182.54837531945967 + inference_time: 5494.0 + throughput: 182.01674554058974 estimated_peak_memory_range: min: 4927488 - max: 37751184 + max: 37236880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -167,14 +131,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: j5q6r647p + job_id: jpv61mr75 job_status: Passed torchscript_onnx: - inference_time: 6390.0 - throughput: 156.49452269170578 + inference_time: 6374.0 + throughput: 156.88735487919675 estimated_peak_memory_range: - min: 17694720 - max: 65843808 + min: 14774272 + max: 62162080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -182,7 +146,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jpv6r6275 + job_id: jpxk7r6j5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -191,13 +155,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:21:52Z' + timestamp: '2024-11-09T23:07:44Z' - torchscript_onnx_qnn: - inference_time: 7636.0 - throughput: 130.95861707700368 + inference_time: 7625.0 + throughput: 131.14754098360655 estimated_peak_memory_range: - min: 4952064 - max: 6261824 + min: 5009408 + max: 7469864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -205,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: jprv2vxvg + job_id: jgjv0y27g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -214,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:21:40Z' + timestamp: '2024-11-09T23:07:34Z' - torchscript_onnx_qnn: - inference_time: 7714.0 - throughput: 129.63443090484833 + inference_time: 7601.0 + throughput: 131.56163662675965 estimated_peak_memory_range: - min: 4972544 - max: 9013136 + min: 4943872 + max: 9337528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -228,7 +192,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: j56yzjwvp + job_id: jgz3xyjz5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -237,13 +201,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:52:31Z' + timestamp: '2024-11-09T23:07:37Z' - torchscript_onnx_qnn: - inference_time: 7796.0 - throughput: 128.27090815802975 + inference_time: 7667.0 + throughput: 130.4291117777488 estimated_peak_memory_range: - min: 4972544 - max: 11977648 + min: 7077888 + max: 9569608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -251,7 +215,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: jp3j136xg + job_id: j5wedz3z5 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -260,13 +224,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:52:32Z' + timestamp: '2024-11-09T23:07:38Z' - torchscript_onnx_qnn: - inference_time: 7692.0 - throughput: 130.00520020800832 + inference_time: 7686.0 + throughput: 130.10668748373666 estimated_peak_memory_range: - min: 4960256 - max: 10295712 + min: 6209536 + max: 10979160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -274,7 +238,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: jgo2n084p + job_id: jg9l32yqg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -283,13 +247,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:52:33Z' + timestamp: '2024-11-09T23:07:39Z' - torchscript_onnx_qnn: - inference_time: 13510.0 - throughput: 74.01924500370096 + inference_time: 14075.0 + throughput: 71.04795737122558 estimated_peak_memory_range: - min: 4956160 - max: 39488352 + min: 49152 + max: 5933920 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 286 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 286 + job_id: jp14d1wkp + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T23:07:40Z' + - torchscript_onnx_qnn: + inference_time: 13593.0 + throughput: 73.56727727506805 + estimated_peak_memory_range: + min: 4931584 + max: 42899936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -297,7 +284,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: jgkeqe6yg + job_id: jgdxr4qkp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -306,10 +293,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:21:45Z' + timestamp: '2024-11-09T23:07:41Z' - torchscript_onnx_qnn: - inference_time: 8289.0 - throughput: 120.64181445288938 + inference_time: 8254.0 + throughput: 121.153380179307 estimated_peak_memory_range: min: 4923392 max: 4923392 @@ -320,14 +307,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: jp2k9koxp + job_id: jpedrxw75 job_status: Passed torchscript_onnx: - inference_time: 9867.0 - throughput: 101.34792743488396 + inference_time: 9924.0 + throughput: 100.76582023377671 estimated_peak_memory_range: - min: 21434368 - max: 21434368 + min: 22401024 + max: 22401024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -335,7 +322,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jgo2n2d4p + job_id: j5mnwk6yp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -344,4 +331,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:21:50Z' + timestamp: '2024-11-09T23:07:45Z' diff --git a/qai_hub_models/models/fastsam_x/README.md b/qai_hub_models/models/fastsam_x/README.md index 0ebcaa2c..bbb571e5 100644 --- a/qai_hub_models/models/fastsam_x/README.md +++ b/qai_hub_models/models/fastsam_x/README.md @@ -5,8 +5,7 @@ The Fast Segment Anything Model (FastSAM) is a novel, real-time CNN-based solution for the Segment Anything task. This task is designed to segment any object within an image based on various possible user interaction prompts. The model performs competitively despite significantly reduced computation, making it a practical choice for a variety of vision tasks. -This is based on the implementation of FastSam-X found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/fastsam_x). diff --git a/qai_hub_models/models/fastsam_x/perf.yaml b/qai_hub_models/models/fastsam_x/perf.yaml index e246bd1a..279ddb9d 100644 --- a/qai_hub_models/models/fastsam_x/perf.yaml +++ b/qai_hub_models/models/fastsam_x/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: FastSam-X performance_metrics: - torchscript_onnx_qnn: - inference_time: 45282.0 - throughput: 22.083830219513274 + inference_time: 46347.0 + throughput: 21.576369560057824 estimated_peak_memory_range: - min: 5001216 - max: 22097784 + min: 3829760 + max: 22698384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 418 - job_id: j5mn6nm7p + job_id: j5q671e7p job_status: Passed torchscript_onnx: - inference_time: 49848.0 - throughput: 20.060985395602632 + inference_time: 48131.0 + throughput: 20.776630446074254 estimated_peak_memory_range: - min: 90112 - max: 165657856 + min: 81920 + max: 1321659768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,7 +70,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 421 - job_id: j56yzy8np + job_id: jg9l32jqg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -77,13 +79,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:21:04Z' + timestamp: '2024-11-09T23:06:58Z' - torchscript_onnx_qnn: - inference_time: 45234.0 - throughput: 22.107264447097315 + inference_time: 34644.0 + throughput: 28.865027133125505 estimated_peak_memory_range: - min: 4943872 - max: 21814984 + min: 4972544 + max: 67135232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -91,52 +93,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 418 - job_id: jgn6m6nj5 + job_id: jglv086e5 job_status: Passed torchscript_onnx: - inference_time: 50079.0 - throughput: 19.968449849238205 + inference_time: 34945.0 + throughput: 28.616397195593073 estimated_peak_memory_range: - min: 90112 - max: 164924952 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 421 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 421 - job_id: jp3j1jzmg - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:21:05Z' - - torchscript_onnx_qnn: - inference_time: 53703.0 - throughput: 18.620933653613392 - estimated_peak_memory_range: - min: 4931584 - max: 64741232 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 418 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 418 - job_id: jprv2v0kg - job_status: Passed - torchscript_onnx: - inference_time: 55052.0 - throughput: 18.164644336263898 - estimated_peak_memory_range: - min: 9945088 - max: 174188320 + min: 0 + max: 164210224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -144,7 +108,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 421 - job_id: jgo2n2l1p + job_id: jp14d1ykp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -153,13 +117,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:21:06Z' + timestamp: '2024-11-09T23:06:59Z' - torchscript_onnx_qnn: - inference_time: 30812.0 - throughput: 32.454887706088535 + inference_time: 27041.0 + throughput: 36.98088088458267 estimated_peak_memory_range: - min: 4927488 - max: 65066240 + min: 4968448 + max: 66339552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -167,14 +131,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 418 - job_id: jglv2vy25 + job_id: j56y3mevp job_status: Passed torchscript_onnx: - inference_time: 31547.0 - throughput: 31.6987352204647 + inference_time: 31760.0 + throughput: 31.486146095717885 estimated_peak_memory_range: min: 0 - max: 79444224 + max: 79575520 primary_compute_unit: NPU precision: fp16 layer_info: @@ -182,7 +146,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 421 - job_id: jgjv2vr1g + job_id: jgdxr4ekp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -191,13 +155,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:21:08Z' + timestamp: '2024-11-09T23:07:00Z' - torchscript_onnx_qnn: - inference_time: 42763.0 - throughput: 23.384701728129457 + inference_time: 43152.0 + throughput: 23.17389692250649 estimated_peak_memory_range: - min: 5033984 - max: 6242896 + min: 5042176 + max: 6505232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -205,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 418 - job_id: jp2k9kw6p + job_id: jp3j47vxg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -214,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:20:55Z' + timestamp: '2024-11-09T23:06:49Z' - torchscript_onnx_qnn: - inference_time: 43483.0 - throughput: 22.997493273233218 + inference_time: 43517.0 + throughput: 22.979525243008478 estimated_peak_memory_range: min: 5107712 - max: 8548544 + max: 6342336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -228,7 +192,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 418 - job_id: jpxk6jdj5 + job_id: jpv61m075 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -237,13 +201,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:52:09Z' + timestamp: '2024-11-09T23:06:52Z' - torchscript_onnx_qnn: - inference_time: 42892.0 - throughput: 23.314370978271008 + inference_time: 43704.0 + throughput: 22.881200805418267 estimated_peak_memory_range: - min: 6692864 - max: 7995664 + min: 5074944 + max: 8481000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -251,7 +215,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 418 - job_id: j5mn62dyp + job_id: jgjv0yz7g job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -260,13 +224,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:52:10Z' + timestamp: '2024-11-09T23:06:53Z' - torchscript_onnx_qnn: - inference_time: 43916.0 - throughput: 22.770744147918755 + inference_time: 43486.0 + throughput: 22.995906728602307 estimated_peak_memory_range: - min: 5525504 - max: 12188336 + min: 5070848 + max: 6350336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -274,7 +238,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 418 - job_id: jgn6mykv5 + job_id: jpedrxe75 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -283,13 +247,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:52:11Z' + timestamp: '2024-11-09T23:06:54Z' + - torchscript_onnx_qnn: + inference_time: 83059.0 + throughput: 12.039634476697287 + estimated_peak_memory_range: + min: 143360 + max: 5766416 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 418 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 418 + job_id: jgz3xyoz5 + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T23:06:56Z' - torchscript_onnx_qnn: - inference_time: 97974.0 - throughput: 10.206789556412925 + inference_time: 88070.0 + throughput: 11.354604292040422 estimated_peak_memory_range: - min: 3842048 - max: 64410464 + min: 4001792 + max: 67905728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -297,7 +284,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 418 - job_id: j5q6r6lep + job_id: j5wedz2z5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -306,10 +293,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:21:01Z' + timestamp: '2024-11-09T23:06:57Z' - torchscript_onnx_qnn: - inference_time: 44423.0 - throughput: 22.51086149066925 + inference_time: 44416.0 + throughput: 22.514409221902017 estimated_peak_memory_range: min: 4923392 max: 4923392 @@ -320,14 +307,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 418 - job_id: jpy1j1x0p + job_id: jgo21wk4p job_status: Passed torchscript_onnx: - inference_time: 49504.0 - throughput: 20.20038784744667 + inference_time: 49553.0 + throughput: 20.180412891247755 estimated_peak_memory_range: - min: 146202624 - max: 146202624 + min: 146169856 + max: 146169856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -335,7 +322,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 421 - job_id: jpv6r6lz5 + job_id: j57yjn0q5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -344,4 +331,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:21:07Z' + timestamp: '2024-11-09T23:07:01Z' diff --git a/qai_hub_models/models/fcn_resnet50/README.md b/qai_hub_models/models/fcn_resnet50/README.md index 862ed945..4df80006 100644 --- a/qai_hub_models/models/fcn_resnet50/README.md +++ b/qai_hub_models/models/fcn_resnet50/README.md @@ -5,8 +5,7 @@ FCN_ResNet50 is a machine learning model that can segment images from the COCO dataset. It uses ResNet50 as a backbone. -This is based on the implementation of FCN-ResNet50 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/fcn_resnet50). diff --git a/qai_hub_models/models/fcn_resnet50/app.py b/qai_hub_models/models/fcn_resnet50/app.py index a47e578e..ac5d6111 100644 --- a/qai_hub_models/models/fcn_resnet50/app.py +++ b/qai_hub_models/models/fcn_resnet50/app.py @@ -67,7 +67,7 @@ def predict(self, image: Image, raw_output: bool = False) -> Image | np.ndarray: input_tensor = preprocess_image(image) output = self.model(input_tensor) output = output[0] - predictions = output.argmax(0).byte().cpu().numpy() + predictions = output.cpu().numpy() if raw_output: return predictions diff --git a/qai_hub_models/models/fcn_resnet50/evaluate.py b/qai_hub_models/models/fcn_resnet50/evaluate.py new file mode 100644 index 00000000..5e6ad861 --- /dev/null +++ b/qai_hub_models/models/fcn_resnet50/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.fcn_resnet50 import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["pascal_voc"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=400, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/fcn_resnet50/model.py b/qai_hub_models/models/fcn_resnet50/model.py index 48f371ff..c19a6aaa 100644 --- a/qai_hub_models/models/fcn_resnet50/model.py +++ b/qai_hub_models/models/fcn_resnet50/model.py @@ -50,13 +50,13 @@ def forward(self, image): Returns: tensor: 1x21xHxW tensor of class logits per pixel """ - return self.model(normalize_image_torchvision(image))["out"] + return self.model(normalize_image_torchvision(image))["out"].argmax(1).byte() @staticmethod def get_input_spec( batch_size: int = 1, - height: int = 512, - width: int = 512, + height: int = 520, + width: int = 520, ) -> InputSpec: # Get the input specification ordered (name -> (shape, type)) pairs for this model. # @@ -71,7 +71,3 @@ def get_output_names() -> list[str]: @staticmethod def get_channel_last_inputs() -> list[str]: return ["image"] - - @staticmethod - def get_channel_last_outputs() -> list[str]: - return ["mask"] diff --git a/qai_hub_models/models/fcn_resnet50/perf.yaml b/qai_hub_models/models/fcn_resnet50/perf.yaml index f0bc5a58..552908f0 100644 --- a/qai_hub_models/models/fcn_resnet50/perf.yaml +++ b/qai_hub_models/models/fcn_resnet50/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,47 +36,33 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: FCN-ResNet50 performance_metrics: - torchscript_onnx_tflite: - inference_time: 41745.0 - throughput: 23.954964666427117 + inference_time: 62203.0 + throughput: 16.076395029178656 estimated_peak_memory_range: - min: 28672 - max: 56326792 + min: 364544 + max: 2506344 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 86 + layers_on_npu: 88 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 86 - job_id: jp3j1j6mg + total_layers: 88 + job_id: jp14d14kp job_status: Passed torchscript_onnx_qnn: - inference_time: 42083.0 - throughput: 23.76256445595609 + inference_time: 62537.0 + throughput: 15.99053360410637 estimated_peak_memory_range: - min: 3252224 - max: 19895096 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 127 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 127 - job_id: jgdxqx26p - job_status: Passed - torchscript_onnx: - inference_time: 43791.0 - throughput: 22.835742504167523 - estimated_peak_memory_range: - min: 51904512 - max: 54242080 + min: 2822144 + max: 28017640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,60 +70,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jp8qmqxqp - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:20:12Z' - - torchscript_onnx_tflite: - inference_time: 41706.0 - throughput: 23.977365367093462 - estimated_peak_memory_range: - min: 22102016 - max: 24381112 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 86 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 86 - job_id: jgo2n281p - job_status: Passed - torchscript_onnx_qnn: - inference_time: 42347.0 - throughput: 23.614423689989845 - estimated_peak_memory_range: - min: 3219456 - max: 20171336 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 127 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 127 - job_id: j57yly9n5 + job_id: jpy142zrp job_status: Passed torchscript_onnx: - inference_time: 43359.0 - throughput: 23.06326252911737 + inference_time: 58725.0 + throughput: 17.02852277564921 estimated_peak_memory_range: - min: 52490240 - max: 55058616 + min: 131072 + max: 83013304 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 129 + layers_on_npu: 130 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 129 - job_id: jgkeqe4vg + total_layers: 130 + job_id: jgjv0yd7g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -145,51 +94,51 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:20:13Z' + timestamp: '2024-11-09T23:06:06Z' - torchscript_onnx_tflite: - inference_time: 36193.0 - throughput: 27.629652142679525 + inference_time: 47988.0 + throughput: 20.8385429690756 estimated_peak_memory_range: - min: 22081536 - max: 190979856 + min: 192512 + max: 302209504 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 86 + layers_on_npu: 88 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 86 - job_id: jpv6r67z5 + total_layers: 88 + job_id: jgdxr4xkp job_status: Passed torchscript_onnx_qnn: - inference_time: 36762.0 - throughput: 27.202002067352158 + inference_time: 47738.0 + throughput: 20.947672713561523 estimated_peak_memory_range: - min: 1753088 - max: 76843664 + min: 0 + max: 154259808 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 127 + layers_on_npu: 129 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 127 - job_id: jp4ldl325 + total_layers: 129 + job_id: jp0z19425 job_status: Passed torchscript_onnx: - inference_time: 39269.0 - throughput: 25.465379816139958 + inference_time: 46355.0 + throughput: 21.5726458850178 estimated_peak_memory_range: min: 0 - max: 172540640 + max: 318538160 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 129 + layers_on_npu: 130 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 129 - job_id: j5q6r6yep + total_layers: 130 + job_id: jpedrxo75 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -198,51 +147,51 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:20:14Z' + timestamp: '2024-11-09T23:06:07Z' - torchscript_onnx_tflite: - inference_time: 29964.0 - throughput: 33.37338139100254 + inference_time: 47855.0 + throughput: 20.896458050360465 estimated_peak_memory_range: - min: 20889600 - max: 123623360 + min: 282624 + max: 213966704 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 86 + layers_on_npu: 88 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 86 - job_id: jp14w46np + total_layers: 88 + job_id: j57yjnyq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 30167.0 - throughput: 33.14880498558027 + inference_time: 39070.0 + throughput: 25.59508574353724 estimated_peak_memory_range: - min: 3170304 - max: 74870592 + min: 3256320 + max: 154621280 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 127 + layers_on_npu: 129 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 127 - job_id: jp0z2zj05 + total_layers: 129 + job_id: jp8q3r2zp job_status: Passed torchscript_onnx: - inference_time: 30534.0 - throughput: 32.750376629331235 + inference_time: 45793.0 + throughput: 21.837398729063395 estimated_peak_memory_range: - min: 34435072 - max: 137187184 + min: 2252800 + max: 232350320 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 129 + layers_on_npu: 130 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 129 - job_id: j56yzy7np + total_layers: 130 + job_id: jgz3xy2z5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -251,36 +200,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:20:17Z' + timestamp: '2024-11-09T23:06:08Z' - torchscript_onnx_tflite: - inference_time: 41216.0 - throughput: 24.262422360248447 + inference_time: 61841.0 + throughput: 16.170501770669944 estimated_peak_memory_range: - min: 22085632 - max: 23898968 + min: 524288 + max: 2402392 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 86 + layers_on_npu: 88 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 86 - job_id: jgjv2vq1g + total_layers: 88 + job_id: jp4lx4lq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 38703.0 - throughput: 25.837790352169083 + inference_time: 58931.0 + throughput: 16.96899764130933 estimated_peak_memory_range: - min: 3272704 - max: 4459824 + min: 3403776 + max: 4799184 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 127 + layers_on_npu: 129 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 127 - job_id: jpxk6kx85 + total_layers: 129 + job_id: jgkel0vyg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -289,36 +238,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:20:04Z' + timestamp: '2024-11-09T23:05:58Z' - torchscript_onnx_tflite: - inference_time: 41526.0 - throughput: 24.08129846361316 + inference_time: 62177.0 + throughput: 16.083117551506184 estimated_peak_memory_range: - min: 22048768 - max: 23791888 + min: 380928 + max: 2737192 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 86 + layers_on_npu: 88 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 86 - job_id: j56yzjdvp + total_layers: 88 + job_id: jpxk7rkj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 39268.0 - throughput: 25.46602831822349 + inference_time: 59156.0 + throughput: 16.90445601460545 estimated_peak_memory_range: - min: 5201920 - max: 6866728 + min: 3477504 + max: 4933256 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 127 + layers_on_npu: 129 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 127 - job_id: jpv6ro975 + total_layers: 129 + job_id: jglv084e5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -327,36 +276,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:51:42Z' + timestamp: '2024-11-09T23:06:01Z' - torchscript_onnx_tflite: - inference_time: 41827.0 - throughput: 23.90800200827217 + inference_time: 61826.0 + throughput: 16.174424999191277 estimated_peak_memory_range: - min: 22122496 - max: 24306376 + min: 393216 + max: 2322504 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 86 + layers_on_npu: 88 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 86 - job_id: jp3j13wxg + total_layers: 88 + job_id: j5mnwknyp job_status: Passed torchscript_onnx_qnn: - inference_time: 39876.0 - throughput: 25.07774099709098 + inference_time: 59171.0 + throughput: 16.900170691723986 estimated_peak_memory_range: - min: 3321856 - max: 4705096 + min: 3440640 + max: 4859168 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 127 + layers_on_npu: 129 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 127 - job_id: jgjv2mw7g + total_layers: 129 + job_id: j56y3m2vp job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -365,36 +314,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:51:43Z' + timestamp: '2024-11-09T23:06:02Z' - torchscript_onnx_tflite: - inference_time: 41635.0 - throughput: 24.018253872943436 + inference_time: 63410.0 + throughput: 15.770383220312253 estimated_peak_memory_range: - min: 22097920 - max: 24196792 + min: 290816 + max: 2428608 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 86 + layers_on_npu: 88 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 86 - job_id: jgo2n044p + total_layers: 88 + job_id: jgn69q0v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 39189.0 - throughput: 25.517364566587563 + inference_time: 59302.0 + throughput: 16.86283767832451 estimated_peak_memory_range: - min: 3297280 - max: 4658408 + min: 3366912 + max: 4985312 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 127 + layers_on_npu: 129 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 127 - job_id: jpedw1l75 + total_layers: 129 + job_id: jp3j47nxg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -403,36 +352,74 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:51:44Z' + timestamp: '2024-11-09T23:06:03Z' - torchscript_onnx_tflite: - inference_time: 68436.0 - throughput: 14.6121924133497 + inference_time: 101220.0 + throughput: 9.879470460383324 estimated_peak_memory_range: - min: 0 - max: 98627872 + min: 430080 + max: 69705040 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 88 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 88 + job_id: jprv4d6vg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 96956.0 + throughput: 10.313956846404555 + estimated_peak_memory_range: + min: 204800 + max: 5870576 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 129 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 129 + job_id: jgo21wz4p + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T23:06:04Z' + - torchscript_onnx_tflite: + inference_time: 91681.0 + throughput: 10.907385390648008 + estimated_peak_memory_range: + min: 413696 + max: 133116432 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 86 + layers_on_npu: 88 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 86 - job_id: jg9lyldmg + total_layers: 88 + job_id: jp2k7dxxp job_status: Passed torchscript_onnx_qnn: - inference_time: 66178.0 - throughput: 15.110761884614222 + inference_time: 91903.0 + throughput: 10.881037615747038 estimated_peak_memory_range: - min: 3244032 - max: 43020624 + min: 3260416 + max: 58862912 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 127 + layers_on_npu: 129 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 127 - job_id: jpy1j1m0p + total_layers: 129 + job_id: jpv61mq75 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,36 +428,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:20:10Z' + timestamp: '2024-11-09T23:06:05Z' - torchscript_onnx_qnn: - inference_time: 39067.0 - throughput: 25.59705121969949 + inference_time: 57375.0 + throughput: 17.429193899782135 estimated_peak_memory_range: - min: 3153920 - max: 3153920 + min: 3256320 + max: 3256320 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 127 + layers_on_npu: 129 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 127 - job_id: j5mn6n87p + total_layers: 129 + job_id: j5q67107p job_status: Passed torchscript_onnx: - inference_time: 42445.0 - throughput: 23.559901048415597 + inference_time: 57745.0 + throughput: 17.317516668109793 estimated_peak_memory_range: - min: 69386240 - max: 69386240 + min: 70905856 + max: 70905856 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 129 + layers_on_npu: 130 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 129 - job_id: jglv2vx25 + total_layers: 130 + job_id: j5wedzwz5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:20:15Z' + timestamp: '2024-11-09T23:06:09Z' diff --git a/qai_hub_models/models/fcn_resnet50_quantized/README.md b/qai_hub_models/models/fcn_resnet50_quantized/README.md index 94047c54..2f275166 100644 --- a/qai_hub_models/models/fcn_resnet50_quantized/README.md +++ b/qai_hub_models/models/fcn_resnet50_quantized/README.md @@ -5,8 +5,7 @@ FCN_ResNet50 is a quantized machine learning model that can segment images from the COCO dataset. It uses ResNet50 as a backbone. -This is based on the implementation of FCN-ResNet50-Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/fcn_resnet50_quantized). diff --git a/qai_hub_models/models/fcn_resnet50_quantized/conftest.py b/qai_hub_models/models/fcn_resnet50_quantized/conftest.py index 8fd7c424..acd2b713 100644 --- a/qai_hub_models/models/fcn_resnet50_quantized/conftest.py +++ b/qai_hub_models/models/fcn_resnet50_quantized/conftest.py @@ -9,7 +9,6 @@ import pytest from qai_hub_models.models.fcn_resnet50_quantized import Model -from qai_hub_models.utils.testing import skip_clone_repo_check # Instantiate the model only once for all tests. @@ -22,7 +21,6 @@ def cached_from_pretrained(): from_pretrained = Model.from_pretrained sig = inspect.signature(from_pretrained) - @skip_clone_repo_check def _cached_from_pretrained(*args, **kwargs): cache_key = str(args) + str(kwargs) model = pretrained_cache.get(cache_key, None) diff --git a/qai_hub_models/models/fcn_resnet50_quantized/evaluate.py b/qai_hub_models/models/fcn_resnet50_quantized/evaluate.py new file mode 100644 index 00000000..fc2aad39 --- /dev/null +++ b/qai_hub_models/models/fcn_resnet50_quantized/evaluate.py @@ -0,0 +1,55 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.fcn_resnet50_quantized import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["pascal_voc"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=400, + supported_datasets=SUPPORTED_DATASETS, + is_hub_quantized=True, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/fcn_resnet50_quantized/export.py b/qai_hub_models/models/fcn_resnet50_quantized/export.py index 26184d96..109ee493 100644 --- a/qai_hub_models/models/fcn_resnet50_quantized/export.py +++ b/qai_hub_models/models/fcn_resnet50_quantized/export.py @@ -13,6 +13,7 @@ from typing import Any, Optional, cast import qai_hub as hub +import torch from qai_hub_models.models.common import ExportResult, TargetRuntime from qai_hub_models.models.fcn_resnet50_quantized import Model @@ -22,6 +23,7 @@ get_model_kwargs, ) from qai_hub_models.utils.compare import torch_inference +from qai_hub_models.utils.input_spec import make_torch_inputs from qai_hub_models.utils.printing import ( print_inference_metrics, print_on_target_demo_cmd, @@ -31,11 +33,14 @@ can_access_qualcomm_ai_hub, export_without_hub_access, ) +from qai_hub_models.utils.quantization import get_calibration_data def export_model( device: Optional[str] = None, chipset: Optional[str] = None, + num_calibration_samples: int = 100, + skip_compiling: bool = False, skip_profiling: bool = False, skip_inferencing: bool = False, skip_downloading: bool = False, @@ -50,13 +55,14 @@ def export_model( This function executes the following recipe: 1. Instantiates a PyTorch model and converts it to a traced TorchScript format - 2. Compiles the model to an asset that can be run on device - 3. Profiles the model performance on a real device - 4. Inferences the model on sample inputs - 5. Downloads the model asset to the local directory - 6. Summarizes the results from profiling and inference + 2. Converts the PyTorch model to ONNX and quantizes the ONNX model. + 3. Compiles the model to an asset that can be run on device + 4. Profiles the model performance on a real device + 5. Inferences the model on sample inputs + 6. Downloads the model asset to the local directory + 7. Summarizes the results from profiling and inference - Each of the last 4 steps can be optionally skipped using the input options. + Each of the last 5 steps can be optionally skipped using the input options. Parameters: device: Device for which to export the model. @@ -64,6 +70,9 @@ def export_model( Defaults to DEFAULT_DEVICE if not specified. chipset: If set, will choose a random device with this chipset. Overrides the `device` argument. + num_calibration_samples: The number of calibration data samples + to use for quantization. + skip_compiling: If set, skips compiling model to format that can run on device. skip_profiling: If set, skips profiling of compiled model on real devices. skip_inferencing: If set, skips computing on-device outputs from sample data. skip_downloading: If set, skips downloading of compiled model. @@ -79,9 +88,10 @@ def export_model( Returns: A struct of: - * A CompileJob object containing metadata about the compile job submitted to hub. + * A CompileJob object containing metadata about the compile job submitted to hub (None if compiling skipped). * An InferenceJob containing metadata about the inference job (None if inferencing skipped). * A ProfileJob containing metadata about the profile job (None if profiling skipped). + * A QuantizeJob object containing metadata about the quantize job submitted to hub """ model_name = "fcn_resnet50_quantized" output_path = Path(output_dir or Path.cwd() / "build" / model_name) @@ -115,26 +125,45 @@ def export_model( ) # Trace the model - source_model = model.convert_to_hub_source_model( - target_runtime, output_path, input_spec + source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) + + print(f"Quantizing model {model_name} with {num_calibration_samples} samples.") + # 2. Converts the PyTorch model to ONNX and quantizes the ONNX model. + onnx_compile_job = hub.submit_compile_job( + model=source_model, + input_specs=input_spec, + device=hub_device, + name=model_name, + options="--target_runtime onnx", + ) + quantize_job = hub.submit_quantize_job( + model=onnx_compile_job.get_target_model(), + calibration_data=get_calibration_data( + input_spec, "pascal_voc", num_calibration_samples + ), + weights_dtype=model.get_weights_dtype(), + activations_dtype=model.get_activations_dtype(), + name=model_name, + options=model.get_quantize_options(), ) + if skip_compiling: + return ExportResult(quantize_job=quantize_job) - # 2. Compiles the model to an asset that can be run on device + # 3. Compiles the model to an asset that can be run on device model_compile_options = model.get_hub_compile_options( target_runtime, compile_options, hub_device ) print(f"Optimizing model {model_name} to run on-device") submitted_compile_job = hub.submit_compile_job( - model=source_model, + model=quantize_job.get_target_model(), input_specs=input_spec, device=hub_device, name=model_name, - calibration_data=model.get_calibration_data(target_runtime), options=model_compile_options, ) compile_job = cast(hub.client.CompileJob, submitted_compile_job) - # 3. Profiles the model performance on a real device + # 4. Profiles the model performance on a real device profile_job: Optional[hub.client.ProfileJob] = None if not skip_profiling: profile_options_all = model.get_hub_profile_options( @@ -149,7 +178,7 @@ def export_model( ) profile_job = cast(hub.client.ProfileJob, submitted_profile_job) - # 4. Inferences the model on sample inputs + # 5. Inferences the model on sample inputs inference_job: Optional[hub.client.InferenceJob] = None if not skip_inferencing: profile_options_all = model.get_hub_profile_options( @@ -170,13 +199,13 @@ def export_model( ) inference_job = cast(hub.client.InferenceJob, submitted_inference_job) - # 5. Downloads the model asset to the local directory + # 6. Downloads the model asset to the local directory if not skip_downloading: os.makedirs(output_path, exist_ok=True) target_model: hub.Model = compile_job.get_target_model() # type: ignore target_model.download(str(output_path / model_name)) - # 6. Summarizes the results from profiling and inference + # 7. Summarizes the results from profiling and inference if not skip_summary and not skip_profiling: assert profile_job is not None and profile_job.wait().success profile_data: dict[str, Any] = profile_job.download_profile() # type: ignore @@ -201,12 +230,13 @@ def export_model( compile_job=compile_job, inference_job=inference_job, profile_job=profile_job, + quantize_job=quantize_job, ) def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, is_hub_quantized=True) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/fcn_resnet50_quantized/model.py b/qai_hub_models/models/fcn_resnet50_quantized/model.py index 19252806..b18e7af9 100644 --- a/qai_hub_models/models/fcn_resnet50_quantized/model.py +++ b/qai_hub_models/models/fcn_resnet50_quantized/model.py @@ -4,83 +4,11 @@ # --------------------------------------------------------------------- from __future__ import annotations -# isort: off -# This verifies aimet is installed, and this must be included first. -from qai_hub_models.utils.quantization_aimet import ( - AIMETQuantizableMixin, -) - -# isort: on - -import torch -from aimet_torch.cross_layer_equalization import equalize_model -from aimet_torch.model_preparer import prepare_model -from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim - from qai_hub_models.models.fcn_resnet50.model import FCN_ResNet50 -from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset -from qai_hub_models.utils.quantization_aimet import ( - constrain_quantized_inputs_to_image_range, - tie_observers, -) +from qai_hub_models.utils.quantization import HubQuantizableMixin MODEL_ID = __name__.split(".")[-2] -MODEL_ASSET_VERSION = 1 -DEFAULT_ENCODINGS = "fcn_resnet50_quantized_encodings.json" - - -class FCN_ResNet50Quantizable(AIMETQuantizableMixin, FCN_ResNet50): - """ - FCN_ResNet50 with post train quantization support. - - Supports only 8 bit weights and activations - """ - - def __init__( - self, - model: QuantizationSimModel, - ) -> None: - FCN_ResNet50.__init__(self, model.model) - AIMETQuantizableMixin.__init__(self, model) - - @classmethod - def from_pretrained( - cls, - aimet_encodings: str | None = "DEFAULT", - ) -> FCN_ResNet50Quantizable: - # Load Model - fp16_model = FCN_ResNet50.from_pretrained() - input_shape = cls.get_input_spec()["image"][0] - - model = prepare_model(fp16_model) - equalize_model(model, input_shape) - - sim = QuantizationSimModel( - model, - quant_scheme="tf_enhanced", - default_param_bw=8, - default_output_bw=8, - config_file=get_default_aimet_config(), - dummy_input=torch.rand(input_shape), - ) - tie_observers(sim) - constrain_quantized_inputs_to_image_range(sim) - - if aimet_encodings: - if aimet_encodings == "DEFAULT": - aimet_encodings = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_ENCODINGS - ).fetch() - load_encodings_to_sim(sim, aimet_encodings) - - final_model = cls(sim) - return final_model - def forward(self, image: torch.Tensor): - """ - Run FCN_ResNet50Quantizable on `image`, and produce a segmentation mask. - See FCN_ResNet50 model for details. - """ - return self.model(image) +class FCN_ResNet50Quantizable(HubQuantizableMixin, FCN_ResNet50): + pass diff --git a/qai_hub_models/models/fcn_resnet50_quantized/perf.yaml b/qai_hub_models/models/fcn_resnet50_quantized/perf.yaml index b4883aed..14b2457d 100644 --- a/qai_hub_models/models/fcn_resnet50_quantized/perf.yaml +++ b/qai_hub_models/models/fcn_resnet50_quantized/perf.yaml @@ -20,6 +20,8 @@ aggregated: - Snapdragon X Plus 8-Core CRD - QCS6490 (Proxy) - RB3 Gen 2 (Proxy) + - QCS8250 (Proxy) + - RB5 (Proxy) - QCS8450 (Proxy) - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) @@ -36,6 +38,7 @@ aggregated: - Snapdragon® X Elite - Snapdragon® X Plus 8-Core - QCS6490 Proxy + - QCS8250 Proxy - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy @@ -46,49 +49,34 @@ models: - name: FCN-ResNet50-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 12951.0 - throughput: 77.21411474017451 + inference_time: 15313.0 + throughput: 65.30399007379351 estimated_peak_memory_range: - min: 5541888 - max: 8006296 + min: 339968 + max: 2191608 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 89 + layers_on_npu: 90 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 89 - job_id: jprv2vokg + total_layers: 90 + job_id: jgdxryekp job_status: Passed torchscript_onnx_qnn: - inference_time: 14750.0 - throughput: 67.79661016949153 + inference_time: 17154.0 + throughput: 58.295441296490615 estimated_peak_memory_range: - min: 28672 - max: 182334432 + min: 36864 + max: 16101904 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 128 + layers_on_npu: 130 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 128 - job_id: jgjv2vw1g - job_status: Passed - torchscript_onnx: - inference_time: 22062.0 - throughput: 45.32680627322999 - estimated_peak_memory_range: - min: 61440 - max: 44249136 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 144 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 144 - job_id: jprv2vnkg + total_layers: 130 + job_id: jgkelwqyg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -97,104 +85,36 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:19:24Z' + timestamp: '2024-11-09T23:49:31Z' - torchscript_onnx_tflite: - inference_time: 12998.0 - throughput: 76.93491306354824 + inference_time: 11131.0 + throughput: 89.8391878537418 estimated_peak_memory_range: - min: 5521408 - max: 320508248 + min: 311296 + max: 111190576 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 89 + layers_on_npu: 90 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 89 - job_id: jp2k9k46p + total_layers: 90 + job_id: j57yj10q5 job_status: Passed torchscript_onnx_qnn: - inference_time: 14786.0 - throughput: 67.63154335181929 + inference_time: 12736.0 + throughput: 78.51758793969849 estimated_peak_memory_range: - min: 1155072 - max: 8816376 + min: 860160 + max: 45156368 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 128 + layers_on_npu: 130 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 128 - job_id: jpedwdl85 - job_status: Passed - torchscript_onnx: - inference_time: 22081.0 - throughput: 45.28780399438431 - estimated_peak_memory_range: - min: 61440 - max: 43815440 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 144 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 144 - job_id: jp2k9kv6p - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:19:25Z' - - torchscript_onnx_tflite: - inference_time: 9200.0 - throughput: 108.69565217391305 - estimated_peak_memory_range: - min: 40960 - max: 94342768 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 89 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 89 - job_id: jpy1j1q0p - job_status: Passed - torchscript_onnx_qnn: - inference_time: 14582.0 - throughput: 68.57769853243725 - estimated_peak_memory_range: - min: 802816 - max: 35227648 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 128 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 128 - job_id: jgz3j3445 - job_status: Passed - torchscript_onnx: - inference_time: 16683.0 - throughput: 59.94125756758377 - estimated_peak_memory_range: - min: 16384 - max: 183336976 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 144 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 144 - job_id: jpy1j170p + total_layers: 130 + job_id: j5q67xr7p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -203,51 +123,36 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:19:26Z' + timestamp: '2024-11-09T23:49:34Z' - torchscript_onnx_tflite: - inference_time: 7751.0 - throughput: 129.01561088891756 + inference_time: 9335.0 + throughput: 107.12372790573112 estimated_peak_memory_range: - min: 5517312 - max: 52126080 + min: 303104 + max: 57449120 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 89 + layers_on_npu: 90 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 89 - job_id: jgo2n241p + total_layers: 90 + job_id: jp4lx6kq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 10813.0 - throughput: 92.48127254231018 - estimated_peak_memory_range: - min: 847872 - max: 35489776 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 128 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 128 - job_id: j5mn6nd7p - job_status: Passed - torchscript_onnx: - inference_time: 14831.0 - throughput: 67.42633672712562 + inference_time: 12821.0 + throughput: 77.99703611262773 estimated_peak_memory_range: - min: 1892352 - max: 103107792 + min: 823296 + max: 42015040 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 144 + layers_on_npu: 130 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 144 - job_id: jp8qmq1qp + total_layers: 130 + job_id: jglv092e5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -256,21 +161,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:19:28Z' + timestamp: '2024-11-09T23:49:36Z' - torchscript_onnx_qnn: - inference_time: 111733.0 - throughput: 8.949907368458737 + inference_time: 140471.0 + throughput: 7.118907105381182 estimated_peak_memory_range: - min: 1052672 - max: 9346768 + min: 1806336 + max: 10079120 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 128 + layers_on_npu: 130 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 128 - job_id: j5we3e145 + total_layers: 130 + job_id: j56y39zvp job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -279,44 +184,59 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-27T00:19:13Z' - - reference_device_info: + timestamp: '2024-11-09T23:49:13Z' + - torchscript_onnx_tflite: + inference_time: 1390981.0 + throughput: 0.718917080822815 + estimated_peak_memory_range: + min: 90566656 + max: 165677488 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 67 + layers_on_gpu: 11 + layers_on_cpu: 12 + total_layers: 90 + job_id: j5mnw1qyp + job_status: Passed + reference_device_info: name: RB5 (Proxy) os: '12' form_factor: Iot os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-27T00:19:01Z' + timestamp: '2024-11-09T23:48:51Z' - torchscript_onnx_tflite: - inference_time: 12950.0 - throughput: 77.22007722007721 + inference_time: 15309.0 + throughput: 65.32105297537396 estimated_peak_memory_range: - min: 5566464 - max: 6953000 + min: 303104 + max: 346058144 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 89 + layers_on_npu: 90 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 89 - job_id: jgkeqe9vg + total_layers: 90 + job_id: jgn69dmv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 13176.0 - throughput: 75.89556769884639 + inference_time: 14137.0 + throughput: 70.73636556553724 estimated_peak_memory_range: - min: 835584 - max: 2112104 + min: 897024 + max: 2159856 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 128 + layers_on_npu: 130 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 128 - job_id: jg9lylxmg + total_layers: 130 + job_id: jp3j4l1xg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -325,36 +245,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:19:14Z' + timestamp: '2024-11-09T23:49:16Z' - torchscript_onnx_tflite: - inference_time: 12949.0 - throughput: 77.22604062089736 + inference_time: 15238.0 + throughput: 65.6254101588135 estimated_peak_memory_range: - min: 5529600 - max: 7538400 + min: 299008 + max: 1858536 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 89 + layers_on_npu: 90 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 89 - job_id: jpxk6j3j5 + total_layers: 90 + job_id: jprv4m2vg job_status: Passed torchscript_onnx_qnn: - inference_time: 13418.0 - throughput: 74.52675510508273 + inference_time: 14236.0 + throughput: 70.24445068839562 estimated_peak_memory_range: - min: 77824 - max: 6665696 + min: 860160 + max: 2199320 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 128 + layers_on_npu: 130 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 128 - job_id: jp2k96vxp + total_layers: 130 + job_id: jpv61yr75 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -363,36 +283,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:51:15Z' + timestamp: '2024-11-09T23:49:20Z' - torchscript_onnx_tflite: - inference_time: 12985.0 - throughput: 77.01193685021178 + inference_time: 15292.0 + throughput: 65.39366989275437 estimated_peak_memory_range: - min: 5545984 - max: 14974296 + min: 372736 + max: 2273384 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 89 + layers_on_npu: 90 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 89 - job_id: j5mn62oyp + total_layers: 90 + job_id: jp2k7q9xp job_status: Passed torchscript_onnx_qnn: - inference_time: 13258.0 - throughput: 75.4261577915221 + inference_time: 14211.0 + throughput: 70.36802476954472 estimated_peak_memory_range: - min: 811008 - max: 2674536 + min: 901120 + max: 2583144 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 128 + layers_on_npu: 130 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 128 - job_id: jpy1jw7rp + total_layers: 130 + job_id: jgjv0627g job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -401,36 +321,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:51:16Z' + timestamp: '2024-11-09T23:49:23Z' - torchscript_onnx_tflite: - inference_time: 12976.0 - throughput: 77.06535141800246 + inference_time: 15275.0 + throughput: 65.46644844517185 estimated_peak_memory_range: - min: 5570560 - max: 7440032 + min: 319488 + max: 2298544 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 89 + layers_on_npu: 90 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 89 - job_id: jprv2qnvg + total_layers: 90 + job_id: jpy14kjrp job_status: Passed torchscript_onnx_qnn: - inference_time: 13257.0 - throughput: 75.431847325941 + inference_time: 14194.0 + throughput: 70.45230379033394 estimated_peak_memory_range: - min: 876544 - max: 2575312 + min: 888832 + max: 2234208 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 128 + layers_on_npu: 130 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 128 - job_id: jp8qm94zp + total_layers: 130 + job_id: jpedr0w75 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -439,36 +359,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:51:18Z' + timestamp: '2024-11-09T23:49:25Z' - torchscript_onnx_tflite: - inference_time: 19028.0 - throughput: 52.55413075467732 + inference_time: 22596.0 + throughput: 44.255620463798905 estimated_peak_memory_range: - min: 5558272 - max: 50490944 + min: 303104 + max: 55797408 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 89 + layers_on_npu: 90 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 89 - job_id: jgn6my7v5 + total_layers: 90 + job_id: jp0z18225 job_status: Passed torchscript_onnx_qnn: - inference_time: 18847.0 - throughput: 53.05884225606197 + inference_time: 21575.0 + throughput: 46.349942062572424 estimated_peak_memory_range: - min: 802816 - max: 6891840 + min: 913408 + max: 6604256 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 128 + layers_on_npu: 130 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 128 - job_id: jp0z2qv25 + total_layers: 130 + job_id: jgz3xqjz5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -477,36 +397,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T01:51:17Z' + timestamp: '2024-11-09T23:49:27Z' - torchscript_onnx_tflite: - inference_time: 15248.0 - throughput: 65.58237145855195 + inference_time: 17951.0 + throughput: 55.70720294134031 estimated_peak_memory_range: - min: 5541888 - max: 102348960 + min: 720896 + max: 111018400 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 89 + layers_on_npu: 90 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 89 - job_id: jp3j1jwmg + total_layers: 90 + job_id: jp8q3dmzp job_status: Passed torchscript_onnx_qnn: - inference_time: 17025.0 - throughput: 58.737151248164466 + inference_time: 19842.0 + throughput: 50.398145348251184 estimated_peak_memory_range: - min: 2953216 - max: 38842032 + min: 827392 + max: 44477360 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 128 + layers_on_npu: 130 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 128 - job_id: jpxk6kd85 + total_layers: 130 + job_id: j5wed03z5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -515,36 +435,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:19:20Z' + timestamp: '2024-11-09T23:49:29Z' - torchscript_onnx_qnn: - inference_time: 13740.0 - throughput: 72.7802037845706 - estimated_peak_memory_range: - min: 811008 - max: 811008 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 128 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 128 - job_id: jp14w4vnp - job_status: Passed - torchscript_onnx: - inference_time: 21670.0 - throughput: 46.14674665436087 + inference_time: 14608.0 + throughput: 68.45564074479738 estimated_peak_memory_range: - min: 35426304 - max: 35426304 + min: 815104 + max: 815104 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 144 + layers_on_npu: 130 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 144 - job_id: jp0z2z605 + total_layers: 130 + job_id: jgo217n4p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -553,4 +458,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:19:27Z' + timestamp: '2024-11-09T23:49:38Z' diff --git a/qai_hub_models/models/fcn_resnet50_quantized/test.py b/qai_hub_models/models/fcn_resnet50_quantized/test.py deleted file mode 100644 index d5dae110..00000000 --- a/qai_hub_models/models/fcn_resnet50_quantized/test.py +++ /dev/null @@ -1,40 +0,0 @@ -# --------------------------------------------------------------------- -# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -# --------------------------------------------------------------------- -import torch - -from qai_hub_models.models.fcn_resnet50.app import FCN_ResNet50App -from qai_hub_models.models.fcn_resnet50.demo import INPUT_IMAGE_ADDRESS -from qai_hub_models.models.fcn_resnet50_quantized.demo import main as demo_main -from qai_hub_models.models.fcn_resnet50_quantized.model import ( - MODEL_ASSET_VERSION, - MODEL_ID, - FCN_ResNet50Quantizable, -) -from qai_hub_models.utils.asset_loaders import ( - CachedWebModelAsset, - load_image, - load_numpy, -) -from qai_hub_models.utils.testing import skip_clone_repo_check - -OUTPUT_IMAGE_MASK = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, "fcn_resnet50_output_mask.npy" -) - - -@skip_clone_repo_check -def test_task(): - # AIMET Quantization Simulator introduces randomness. Eliminate that for this test. - torch.manual_seed(0) - image = load_image(INPUT_IMAGE_ADDRESS) - app = FCN_ResNet50App(FCN_ResNet50Quantizable.from_pretrained()) - output_mask = app.predict(image, True) - output_mask_gt = load_numpy(OUTPUT_IMAGE_MASK) - assert (output_mask == output_mask_gt).mean() > 0.95 - - -@skip_clone_repo_check -def test_demo(): - demo_main(is_test=True) diff --git a/qai_hub_models/models/ffnet_122ns_lowres/README.md b/qai_hub_models/models/ffnet_122ns_lowres/README.md index 2b69b08a..bca0a9e8 100644 --- a/qai_hub_models/models/ffnet_122ns_lowres/README.md +++ b/qai_hub_models/models/ffnet_122ns_lowres/README.md @@ -5,8 +5,7 @@ FFNet-122NS-LowRes is a "fuss-free network" that segments street scene images with per-pixel classes like road, sidewalk, and pedestrian. Trained on the Cityscapes dataset. -This is based on the implementation of FFNet-122NS-LowRes found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/ffnet_122ns_lowres). diff --git a/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml b/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml index 9f279b2c..db5b8f3f 100644 --- a/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml +++ b/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: FFNet-122NS-LowRes performance_metrics: - torchscript_onnx_tflite: - inference_time: 7358.0 - throughput: 135.9064963305246 + inference_time: 7375.0 + throughput: 135.59322033898306 estimated_peak_memory_range: - min: 36864 - max: 2429336 + min: 651264 + max: 3822320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: jgn6m6xq5 + job_id: jp14d1znp job_status: Passed torchscript_onnx_qnn: - inference_time: 7282.0 - throughput: 137.32491073880803 + inference_time: 7164.0 + throughput: 139.58682300390842 estimated_peak_memory_range: min: 6316032 - max: 33954960 + max: 38035960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: j56yzyryp + job_id: jpy14230p job_status: Passed torchscript_onnx: - inference_time: 7497.0 - throughput: 133.38668800853674 + inference_time: 7300.0 + throughput: 136.986301369863 estimated_peak_memory_range: - min: 49152 - max: 965601968 + min: 110592 + max: 61929960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 350 - job_id: jgdxqx8zp + job_id: jgjv0yv1g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:17:37Z' + timestamp: '2024-11-09T23:05:14Z' - torchscript_onnx_tflite: - inference_time: 7325.0 - throughput: 136.51877133105802 + inference_time: 5038.0 + throughput: 198.4914648670107 estimated_peak_memory_range: - min: 643072 - max: 3215248 + min: 536576 + max: 72098448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: jprv2v97g + job_id: jgdxr416p job_status: Passed torchscript_onnx_qnn: - inference_time: 7202.0 - throughput: 138.8503193557345 + inference_time: 4973.0 + throughput: 201.08586366378444 estimated_peak_memory_range: - min: 6307840 - max: 35917448 + min: 6324224 + max: 31230160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: jp3j1jxng + job_id: jp0z19z05 job_status: Passed torchscript_onnx: - inference_time: 7343.0 - throughput: 136.1841209314994 + inference_time: 5363.0 + throughput: 186.46280067126608 estimated_peak_memory_range: - min: 12288 - max: 62580832 + min: 2674688 + max: 97850336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 350 - job_id: j5we3e845 + job_id: jpedrxd85 job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:17:38Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T23:05:15Z' - torchscript_onnx_tflite: - inference_time: 7189.0 - throughput: 139.10140492418972 + inference_time: 4902.0 + throughput: 203.9983680130559 estimated_peak_memory_range: - min: 663552 - max: 70827264 + min: 122880 + max: 30176816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: jp2k9kjqp + job_id: j57yjnrn5 job_status: Passed torchscript_onnx_qnn: - inference_time: 5452.0 - throughput: 183.41892883345562 + inference_time: 4803.0 + throughput: 208.20320632937748 estimated_peak_memory_range: - min: 6307840 - max: 29920080 + min: 6303744 + max: 28210048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,37 +176,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: jgo2n2okp - job_status: Passed - torchscript_onnx: - inference_time: 6280.0 - throughput: 159.23566878980893 - estimated_peak_memory_range: - min: 2060288 - max: 97471248 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 350 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 350 - job_id: jg9lylkmg + job_id: jp8q3rqqp job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:17:39Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T23:05:16Z' - torchscript_onnx_tflite: - inference_time: 4900.0 - throughput: 204.08163265306123 + inference_time: 7277.0 + throughput: 137.4192661811186 estimated_peak_memory_range: - min: 53248 - max: 30246944 + min: 643072 + max: 2788640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +199,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: jglv2vom5 + job_id: jp4lx4r25 job_status: Passed torchscript_onnx_qnn: - inference_time: 4785.0 - throughput: 208.9864158829676 + inference_time: 6749.0 + throughput: 148.1700992739665 estimated_peak_memory_range: - min: 6303744 - max: 27877984 + min: 6336512 + max: 7441264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +214,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: jp14w477p - job_status: Passed - torchscript_onnx: - inference_time: 5279.0 - throughput: 189.42981625307823 - estimated_peak_memory_range: - min: 7577600 - max: 52608848 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 350 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 350 - job_id: jgdxqx86p + job_id: jgkel0evg job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:17:41Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T23:05:07Z' - torchscript_onnx_tflite: - inference_time: 7272.0 - throughput: 137.5137513751375 + inference_time: 7265.0 + throughput: 137.64624913971093 estimated_peak_memory_range: - min: 720896 - max: 2853760 + min: 1191936 + max: 3407184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +237,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: jpy1j1nlp + job_id: jpxk7ro85 job_status: Passed torchscript_onnx_qnn: - inference_time: 6723.0 - throughput: 148.74312063067083 + inference_time: 6717.0 + throughput: 148.87598630340926 estimated_peak_memory_range: - min: 6365184 - max: 7563824 + min: 6336512 + max: 8289944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +252,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: jpv6r6er5 + job_id: jglv08v25 job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:17:29Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T23:05:09Z' - torchscript_onnx_tflite: - inference_time: 7352.0 - throughput: 136.01741022850925 + inference_time: 7383.0 + throughput: 135.4462955438169 estimated_peak_memory_range: - min: 651264 - max: 3106952 + min: 655360 + max: 2867456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +275,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: jpedw1885 + job_id: j5mnwkx7p job_status: Passed torchscript_onnx_qnn: - inference_time: 6711.0 - throughput: 149.00908955446283 + inference_time: 6736.0 + throughput: 148.45605700712588 estimated_peak_memory_range: - min: 6344704 - max: 8042824 + min: 6340608 + max: 7721224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +290,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: jg9ly1kmg + job_id: j56y3mynp job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:49:53Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T23:05:10Z' - torchscript_onnx_tflite: - inference_time: 7326.0 - throughput: 136.5001365001365 + inference_time: 7418.0 + throughput: 134.80722566729577 estimated_peak_memory_range: - min: 1216512 - max: 3267464 + min: 729088 + max: 3006880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +313,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: jgz3j9845 + job_id: jgn69qvj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 6706.0 - throughput: 149.1201908738443 + inference_time: 6780.0 + throughput: 147.49262536873155 estimated_peak_memory_range: - min: 6340608 - max: 7478600 + min: 7376896 + max: 8896400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +328,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: jp14wl7np + job_id: jp3j47jmg job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:49:54Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T23:05:11Z' - torchscript_onnx_tflite: - inference_time: 7292.0 - throughput: 137.13658804168952 + inference_time: 12576.0 + throughput: 79.51653944020356 estimated_peak_memory_range: - min: 663552 - max: 3379704 + min: 638976 + max: 25882864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +351,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: j5we3v845 + job_id: jprv4d3kg job_status: Passed torchscript_onnx_qnn: - inference_time: 6834.0 - throughput: 146.3271875914545 + inference_time: 10955.0 + throughput: 91.28251939753537 estimated_peak_memory_range: - min: 6377472 - max: 7706728 + min: 65536 + max: 5725744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +366,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: jgdxq986p + job_id: jgo21w21p job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:49:56Z' + chipset: SA8295P + timestamp: '2024-11-09T23:05:12Z' - torchscript_onnx_tflite: - inference_time: 10745.0 - throughput: 93.06654257794322 + inference_time: 10771.0 + throughput: 92.8418902608857 estimated_peak_memory_range: - min: 659456 - max: 64212064 + min: 638976 + max: 63216320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +389,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: j5q6r6wop + job_id: jp2k7dy6p job_status: Passed torchscript_onnx_qnn: - inference_time: 11005.0 - throughput: 90.86778736937755 + inference_time: 11094.0 + throughput: 90.13881377321074 estimated_peak_memory_range: - min: 6303744 - max: 29328000 + min: 0 + max: 21195104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +404,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: jg9lylk8g + job_id: jpv61m6z5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +413,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:17:35Z' + timestamp: '2024-11-09T23:05:13Z' - torchscript_onnx_qnn: - inference_time: 7101.0 - throughput: 140.8252358822701 + inference_time: 7095.0 + throughput: 140.94432699083862 estimated_peak_memory_range: min: 6303744 max: 6303744 @@ -455,14 +427,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: jgjv2voeg + job_id: j5q6716ep job_status: Passed torchscript_onnx: - inference_time: 7668.0 - throughput: 130.41210224308816 + inference_time: 7669.0 + throughput: 130.39509714434737 estimated_peak_memory_range: - min: 60080128 - max: 60080128 + min: 61026304 + max: 61026304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +442,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 350 - job_id: jp14w47np + job_id: j5wedze45 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +451,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:17:40Z' + timestamp: '2024-11-09T23:05:17Z' diff --git a/qai_hub_models/models/ffnet_40s/README.md b/qai_hub_models/models/ffnet_40s/README.md index d2b952c7..1a82f4bb 100644 --- a/qai_hub_models/models/ffnet_40s/README.md +++ b/qai_hub_models/models/ffnet_40s/README.md @@ -5,8 +5,7 @@ FFNet-40S is a "fuss-free network" that segments street scene images with per-pixel classes like road, sidewalk, and pedestrian. Trained on the Cityscapes dataset. -This is based on the implementation of FFNet-40S found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/ffnet_40s). diff --git a/qai_hub_models/models/ffnet_40s/perf.yaml b/qai_hub_models/models/ffnet_40s/perf.yaml index 19221aaa..394a84ee 100644 --- a/qai_hub_models/models/ffnet_40s/perf.yaml +++ b/qai_hub_models/models/ffnet_40s/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: FFNet-40S performance_metrics: - torchscript_onnx_tflite: - inference_time: 17087.0 - throughput: 58.52402411189794 + inference_time: 16938.0 + throughput: 59.0388475616956 estimated_peak_memory_range: - min: 2158592 - max: 33794168 + min: 2539520 + max: 4719584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: j57ylyv95 + job_id: jpedr6385 job_status: Passed torchscript_onnx_qnn: - inference_time: 17134.0 - throughput: 58.36348780203105 + inference_time: 17362.0 + throughput: 57.59705103098722 estimated_peak_memory_range: - min: 25206784 - max: 42477840 + min: 25231360 + max: 50366968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jp8qmqwop + job_id: j5mnw9y7p job_status: Passed torchscript_onnx: inference_time: 24661.0 throughput: 40.54985604801103 estimated_peak_memory_range: - min: 25206784 - max: 490737264 + min: 25235456 + max: 56403408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jgz3j3wx5 + job_id: j56y3m4np job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:16:46Z' + timestamp: '2024-11-09T23:04:22Z' - torchscript_onnx_tflite: - inference_time: 17016.0 - throughput: 58.76821814762577 + inference_time: 12305.0 + throughput: 81.26777732629013 estimated_peak_memory_range: - min: 2531328 - max: 5249752 + min: 581632 + max: 108473472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: jp4ldlj15 + job_id: jgz3xzk45 job_status: Passed torchscript_onnx_qnn: - inference_time: 17612.0 - throughput: 56.77946854417443 + inference_time: 12568.0 + throughput: 79.56715467854869 estimated_peak_memory_range: - min: 26247168 - max: 52077248 + min: 25182208 + max: 62460736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jgkeqerng + job_id: jgn6918j5 job_status: Passed torchscript_onnx: - inference_time: 25029.0 - throughput: 39.9536537616365 + inference_time: 18446.0 + throughput: 54.21229534858506 estimated_peak_memory_range: - min: 25198592 - max: 27769656 + min: 32542720 + max: 165110656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: j5we3exm5 + job_id: jp3j470mg job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:16:47Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T23:04:23Z' - torchscript_onnx_tflite: - inference_time: 14942.0 - throughput: 66.9254450542096 + inference_time: 11870.0 + throughput: 84.24599831508003 estimated_peak_memory_range: - min: 2523136 - max: 108121184 + min: 561152 + max: 44964080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: jpxk6kel5 + job_id: j5wedyn45 job_status: Passed torchscript_onnx_qnn: - inference_time: 20777.0 - throughput: 48.13014390913029 + inference_time: 12122.0 + throughput: 82.49463784853984 estimated_peak_memory_range: - min: 25198592 - max: 60290528 + min: 34770944 + max: 67836976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: j5q6r69op + job_id: jprv4xjkg job_status: Passed torchscript_onnx: - inference_time: 21372.0 - throughput: 46.790192775594235 + inference_time: 15688.0 + throughput: 63.74298827129016 estimated_peak_memory_range: - min: 28966912 - max: 158552112 + min: 19415040 + max: 72926288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jg9lyl88g + job_id: jgo21w61p job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:16:48Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T23:04:24Z' - torchscript_onnx_tflite: - inference_time: 11826.0 - throughput: 84.5594452900389 + inference_time: 16894.0 + throughput: 59.192612761927315 estimated_peak_memory_range: - min: 1019904 - max: 45611056 + min: 2506752 + max: 5009808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: jp0z2zen5 + job_id: jg9l3oemg job_status: Passed torchscript_onnx_qnn: - inference_time: 12129.0 - throughput: 82.44702778464837 + inference_time: 16325.0 + throughput: 61.25574272588055 estimated_peak_memory_range: - min: 25178112 - max: 58307376 + min: 25235456 + max: 26360600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jpedwd2v5 - job_status: Passed - torchscript_onnx: - inference_time: 17620.0 - throughput: 56.75368898978434 - estimated_peak_memory_range: - min: 30437376 - max: 84456384 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 142 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 142 - job_id: jgdxqx0zp + job_id: jp2k7on6p job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:16:51Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T23:04:14Z' - torchscript_onnx_tflite: - inference_time: 16946.0 - throughput: 59.01097604154373 + inference_time: 16927.0 + throughput: 59.077213918591596 estimated_peak_memory_range: - min: 2547712 - max: 4812760 + min: 2539520 + max: 4845616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: j5mn6nv9p + job_id: jp14doxnp job_status: Passed torchscript_onnx_qnn: - inference_time: 16298.0 - throughput: 61.357221744999386 + inference_time: 16448.0 + throughput: 60.797665369649806 estimated_peak_memory_range: min: 25243648 - max: 26567424 + max: 26838976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jglv2v7m5 + job_id: jp0z19005 job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:16:39Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T23:04:16Z' - torchscript_onnx_tflite: - inference_time: 17185.0 - throughput: 58.19028222286878 + inference_time: 16989.0 + throughput: 58.86161633998469 estimated_peak_memory_range: - min: 2539520 - max: 4548368 + min: 12288 + max: 7604896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: jp8qm96qp + job_id: jgdxr6l6p job_status: Passed torchscript_onnx_qnn: - inference_time: 16399.0 - throughput: 60.97932800780536 + inference_time: 16673.0 + throughput: 59.97720866070893 estimated_peak_memory_range: - min: 25239552 - max: 26895712 + min: 25235456 + max: 26591176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jglv2zo25 + job_id: jp8q3ryqp job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:49:23Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T23:04:17Z' - torchscript_onnx_tflite: - inference_time: 17010.0 - throughput: 58.78894767783657 + inference_time: 16923.0 + throughput: 59.0911776871713 estimated_peak_memory_range: - min: 2543616 - max: 4599144 + min: 2539520 + max: 4637368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: jgkeqnovg + job_id: j57yjo3n5 job_status: Passed torchscript_onnx_qnn: - inference_time: 16415.0 - throughput: 60.91989034419738 + inference_time: 16644.0 + throughput: 60.0817111271329 estimated_peak_memory_range: - min: 25268224 - max: 27066136 + min: 25255936 + max: 26761056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: j56yzjrnp + job_id: jgkel0xvg job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:49:25Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T23:04:19Z' - torchscript_onnx_tflite: - inference_time: 17251.0 - throughput: 57.96765404904063 + inference_time: 31644.0 + throughput: 31.60156743774491 estimated_peak_memory_range: - min: 2486272 - max: 4575440 + min: 2543616 + max: 40453376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: j5q6rkzep + job_id: jp4lxe025 job_status: Passed torchscript_onnx_qnn: - inference_time: 16392.0 - throughput: 61.00536847242557 + inference_time: 30730.0 + throughput: 32.54149040026033 estimated_peak_memory_range: - min: 25247744 - max: 26561864 + min: 25182208 + max: 31051376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jp3j13xmg + job_id: j5q671qep job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:49:26Z' + chipset: SA8295P + timestamp: '2024-11-09T23:04:20Z' - torchscript_onnx_tflite: - inference_time: 27631.0 - throughput: 36.191234483008216 + inference_time: 27756.0 + throughput: 36.028246144977665 estimated_peak_memory_range: - min: 24576 - max: 94500224 + min: 2572288 + max: 98545264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: jpy1j1vlp + job_id: jpxk70285 job_status: Passed torchscript_onnx_qnn: - inference_time: 28300.0 - throughput: 35.3356890459364 + inference_time: 28687.0 + throughput: 34.858995363753614 estimated_peak_memory_range: - min: 25182208 - max: 59746448 + min: 23080960 + max: 55458736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jgjv2v1eg + job_id: jglv08m25 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:16:44Z' + timestamp: '2024-11-09T23:04:21Z' - torchscript_onnx_qnn: - inference_time: 16318.0 - throughput: 61.282019855374436 + inference_time: 16414.0 + throughput: 60.92360180333861 estimated_peak_memory_range: min: 25219072 max: 25219072 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: j56yzyvyp + job_id: jpy14800p job_status: Passed torchscript_onnx: - inference_time: 30203.0 - throughput: 33.1092937787637 + inference_time: 30117.0 + throughput: 33.203838363714844 estimated_peak_memory_range: - min: 25219072 - max: 25219072 + min: 25227264 + max: 25227264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jp14w437p + job_id: jpv61mkz5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:16:49Z' + timestamp: '2024-11-09T23:04:25Z' diff --git a/qai_hub_models/models/ffnet_40s_quantized/README.md b/qai_hub_models/models/ffnet_40s_quantized/README.md index 1772b22d..b7d0f17e 100644 --- a/qai_hub_models/models/ffnet_40s_quantized/README.md +++ b/qai_hub_models/models/ffnet_40s_quantized/README.md @@ -5,8 +5,7 @@ FFNet-40S-Quantized is a "fuss-free network" that segments street scene images with per-pixel classes like road, sidewalk, and pedestrian. Trained on the Cityscapes dataset. -This is based on the implementation of FFNet-40S-Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/ffnet_40s_quantized). diff --git a/qai_hub_models/models/ffnet_40s_quantized/export.py b/qai_hub_models/models/ffnet_40s_quantized/export.py index 62e74f9c..51b1d9fb 100644 --- a/qai_hub_models/models/ffnet_40s_quantized/export.py +++ b/qai_hub_models/models/ffnet_40s_quantized/export.py @@ -206,9 +206,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser( - model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False - ) + parser = export_parser(model_cls=Model, supports_qnn=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/ffnet_40s_quantized/perf.yaml b/qai_hub_models/models/ffnet_40s_quantized/perf.yaml index 0f126017..1ac5b694 100644 --- a/qai_hub_models/models/ffnet_40s_quantized/perf.yaml +++ b/qai_hub_models/models/ffnet_40s_quantized/perf.yaml @@ -49,49 +49,11 @@ models: - name: FFNet-40S-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 4161.0 - throughput: 240.3268445085316 - estimated_peak_memory_range: - min: 638976 - max: 2241248 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 99 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 99 - job_id: jgo2n21kp - job_status: Passed - torchscript_onnx: - inference_time: 9025.0 - throughput: 110.80332409972299 - estimated_peak_memory_range: - min: 110592 - max: 12489856 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 168 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 168 - job_id: jgo2n2ekp - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:15:59Z' - - torchscript_onnx_tflite: - inference_time: 4136.0 - throughput: 241.77949709864603 + inference_time: 4183.0 + throughput: 239.0628735357399 estimated_peak_memory_range: min: 651264 - max: 2876776 + max: 2761720 primary_compute_unit: NPU precision: int8 layer_info: @@ -99,14 +61,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 99 - job_id: jpv6r61r5 + job_id: j56y3ojnp job_status: Passed torchscript_onnx: - inference_time: 9019.0 - throughput: 110.87703736556159 + inference_time: 8994.0 + throughput: 111.185234600845 estimated_peak_memory_range: - min: 57344 - max: 12228296 + min: 491520 + max: 12155576 primary_compute_unit: NPU precision: int8 layer_info: @@ -114,7 +76,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 168 - job_id: jpv6r6zr5 + job_id: j5q674oep job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -123,13 +85,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:16:00Z' + timestamp: '2024-11-09T23:03:35Z' - torchscript_onnx_tflite: - inference_time: 2908.0 - throughput: 343.878954607978 + inference_time: 2919.0 + throughput: 342.58307639602606 estimated_peak_memory_range: - min: 376832 - max: 66236128 + min: 638976 + max: 70312192 primary_compute_unit: NPU precision: int8 layer_info: @@ -137,14 +99,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 99 - job_id: jgjv2v0eg + job_id: jp3j4o3mg job_status: Passed torchscript_onnx: - inference_time: 6337.0 - throughput: 157.80337699226763 + inference_time: 6253.0 + throughput: 159.92323684631376 estimated_peak_memory_range: - min: 7577600 - max: 115054432 + min: 7655424 + max: 117328208 primary_compute_unit: NPU precision: int8 layer_info: @@ -152,7 +114,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 168 - job_id: jgjv2vkeg + job_id: jglv0wr25 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -161,13 +123,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:16:01Z' + timestamp: '2024-11-09T23:03:36Z' - torchscript_onnx_tflite: - inference_time: 2924.0 - throughput: 341.9972640218878 + inference_time: 2975.0 + throughput: 336.1344537815126 estimated_peak_memory_range: - min: 651264 - max: 32971552 + min: 32768 + max: 32167424 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,14 +137,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 99 - job_id: jp4ldlx15 + job_id: jgo21d01p job_status: Passed torchscript_onnx: - inference_time: 6105.0 - throughput: 163.8001638001638 + inference_time: 6106.0 + throughput: 163.77333770062233 estimated_peak_memory_range: - min: 7446528 - max: 61103712 + min: 7630848 + max: 61265152 primary_compute_unit: NPU precision: int8 layer_info: @@ -190,7 +152,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 168 - job_id: jgz3j3vx5 + job_id: j56y3olnp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -199,13 +161,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:16:03Z' + timestamp: '2024-11-09T23:03:37Z' - torchscript_onnx_tflite: - inference_time: 27146.0 - throughput: 36.83783982907242 + inference_time: 26424.0 + throughput: 37.84438389343021 estimated_peak_memory_range: - min: 700416 - max: 42036288 + min: 12288 + max: 40318288 primary_compute_unit: NPU precision: int8 layer_info: @@ -213,7 +175,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 99 - job_id: jpedwdrv5 + job_id: jpv612oz5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -222,13 +184,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-27T00:15:36Z' + timestamp: '2024-11-09T23:03:15Z' - torchscript_onnx_tflite: - inference_time: 189511.0 - throughput: 5.276738553434893 + inference_time: 188962.0 + throughput: 5.292069304939617 estimated_peak_memory_range: - min: 942080 - max: 3247272 + min: 1007616 + max: 9811312 primary_compute_unit: NPU precision: int8 layer_info: @@ -236,7 +198,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 99 - job_id: jgz3j3xx5 + job_id: jgjv03m1g job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -245,13 +207,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-27T00:15:37Z' + timestamp: '2024-11-09T23:03:16Z' - torchscript_onnx_tflite: - inference_time: 4064.0 - throughput: 246.06299212598427 + inference_time: 4076.0 + throughput: 245.3385672227674 estimated_peak_memory_range: - min: 36864 - max: 6492832 + min: 20480 + max: 145505720 primary_compute_unit: NPU precision: int8 layer_info: @@ -259,7 +221,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 99 - job_id: j5we3edm5 + job_id: jpedr6185 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -268,13 +230,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:15:38Z' + timestamp: '2024-11-09T23:03:17Z' - torchscript_onnx_tflite: - inference_time: 4071.0 - throughput: 245.63989191844755 + inference_time: 4177.0 + throughput: 239.40627244433804 estimated_peak_memory_range: - min: 651264 - max: 35975592 + min: 630784 + max: 2923184 primary_compute_unit: NPU precision: int8 layer_info: @@ -282,7 +244,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 99 - job_id: jgdxq906p + job_id: jgz3xz945 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -291,13 +253,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:48:54Z' + timestamp: '2024-11-09T23:03:18Z' - torchscript_onnx_tflite: - inference_time: 4056.0 - throughput: 246.5483234714004 + inference_time: 4160.0 + throughput: 240.3846153846154 estimated_peak_memory_range: - min: 643072 - max: 9374312 + min: 16384 + max: 183158040 primary_compute_unit: NPU precision: int8 layer_info: @@ -305,7 +267,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 99 - job_id: j57ylw6n5 + job_id: j5wedyv45 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -314,13 +276,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:48:55Z' + timestamp: '2024-11-09T23:03:19Z' - torchscript_onnx_tflite: - inference_time: 4059.0 - throughput: 246.3661000246366 + inference_time: 4072.0 + throughput: 245.5795677799607 estimated_peak_memory_range: - min: 630784 - max: 2971752 + min: 638976 + max: 3273920 primary_compute_unit: NPU precision: int8 layer_info: @@ -328,7 +290,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 99 - job_id: jpxk6jm85 + job_id: jg9l3o1mg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -337,13 +299,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:48:57Z' + timestamp: '2024-11-09T23:03:20Z' - torchscript_onnx_tflite: - inference_time: 7965.0 - throughput: 125.54927809165098 + inference_time: 8068.0 + throughput: 123.94645513138325 estimated_peak_memory_range: - min: 2789376 - max: 35037984 + min: 663552 + max: 33148320 primary_compute_unit: NPU precision: int8 layer_info: @@ -351,7 +313,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 99 - job_id: jp4ldo825 + job_id: jp14dolnp job_status: Passed reference_device_info: name: SA8295P ADP @@ -360,13 +322,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T01:48:56Z' + timestamp: '2024-11-09T23:03:21Z' - torchscript_onnx_tflite: - inference_time: 5111.0 - throughput: 195.65642731363727 + inference_time: 5159.0 + throughput: 193.83601473153712 estimated_peak_memory_range: - min: 0 - max: 69418832 + min: 311296 + max: 67313184 primary_compute_unit: NPU precision: int8 layer_info: @@ -374,7 +336,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 99 - job_id: j57ylyj95 + job_id: jgdxr696p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -383,13 +345,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:15:42Z' + timestamp: '2024-11-09T23:03:22Z' - torchscript_onnx: - inference_time: 9109.0 - throughput: 109.78153474585575 + inference_time: 9128.0 + throughput: 109.55302366345312 estimated_peak_memory_range: - min: 10825728 - max: 10825728 + min: 10727424 + max: 10727424 primary_compute_unit: NPU precision: int8 layer_info: @@ -397,7 +359,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 168 - job_id: jpedwd4v5 + job_id: jp3j4o2mg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -406,4 +368,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:16:02Z' + timestamp: '2024-11-09T23:03:39Z' diff --git a/qai_hub_models/models/ffnet_54s/README.md b/qai_hub_models/models/ffnet_54s/README.md index a652acc3..e425f713 100644 --- a/qai_hub_models/models/ffnet_54s/README.md +++ b/qai_hub_models/models/ffnet_54s/README.md @@ -5,8 +5,7 @@ FFNet-54S is a "fuss-free network" that segments street scene images with per-pixel classes like road, sidewalk, and pedestrian. Trained on the Cityscapes dataset. -This is based on the implementation of FFNet-54S found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/ffnet_54s). diff --git a/qai_hub_models/models/ffnet_54s/perf.yaml b/qai_hub_models/models/ffnet_54s/perf.yaml index 1d1bfb42..ae0297de 100644 --- a/qai_hub_models/models/ffnet_54s/perf.yaml +++ b/qai_hub_models/models/ffnet_54s/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: FFNet-54S performance_metrics: - torchscript_onnx_tflite: - inference_time: 20095.0 - throughput: 49.76362279173924 + inference_time: 19705.0 + throughput: 50.748540979446844 estimated_peak_memory_range: - min: 2129920 - max: 4040728 + min: 2539520 + max: 4561936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: jgo2n23qp + job_id: j56y3o6yp job_status: Passed torchscript_onnx_qnn: - inference_time: 20012.0 - throughput: 49.970017989206475 + inference_time: 20103.0 + throughput: 49.74381933044819 estimated_peak_memory_range: - min: 25178112 - max: 49155496 + min: 25231360 + max: 50426880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: j5we3e9m5 + job_id: jp14do07p job_status: Passed torchscript_onnx: - inference_time: 28227.0 - throughput: 35.42707336946895 + inference_time: 27922.0 + throughput: 35.814053434567725 estimated_peak_memory_range: - min: 15572992 - max: 30319864 + min: 25243648 + max: 39597888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 177 - job_id: jp2k9kmqp + job_id: jgn6912j5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:14:49Z' + timestamp: '2024-11-09T23:02:26Z' - torchscript_onnx_tflite: - inference_time: 19971.0 - throughput: 50.0726052776526 + inference_time: 14531.0 + throughput: 68.81838827334664 estimated_peak_memory_range: - min: 2056192 - max: 4249824 + min: 12288 + max: 117433744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: jpv6r6wk5 + job_id: jp3j4okng job_status: Passed torchscript_onnx_qnn: - inference_time: 19974.0 - throughput: 50.065084609992994 + inference_time: 14486.0 + throughput: 69.0321689907497 estimated_peak_memory_range: - min: 25186304 - max: 50306096 + min: 25182208 + max: 63317920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: jg9lyl48g + job_id: jgdxr6wzp job_status: Passed torchscript_onnx: - inference_time: 27864.0 - throughput: 35.88860178007465 + inference_time: 21100.0 + throughput: 47.39336492890995 estimated_peak_memory_range: - min: 25202688 - max: 39771304 + min: 30859264 + max: 175096896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 177 - job_id: jpy1j1dlp + job_id: jprv4xkkg job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:14:51Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T23:02:27Z' - torchscript_onnx_tflite: - inference_time: 17805.0 - throughput: 56.16399887672002 + inference_time: 14005.0 + throughput: 71.40307033202427 estimated_peak_memory_range: - min: 843776 - max: 118856640 + min: 1474560 + max: 50149120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: jgjv2vlvg + job_id: jgo21dykp job_status: Passed torchscript_onnx_qnn: - inference_time: 17565.0 - throughput: 56.931397665812696 + inference_time: 14044.0 + throughput: 71.20478496154942 estimated_peak_memory_range: - min: 21004288 - max: 57734320 + min: 25178112 + max: 62089760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: jp14w487p + job_id: j5wedyo45 job_status: Passed torchscript_onnx: - inference_time: 30952.0 - throughput: 32.30808994572241 + inference_time: 22090.0 + throughput: 45.26935264825713 estimated_peak_memory_range: - min: 507904 - max: 141237872 + min: 29736960 + max: 81612384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 177 - job_id: jp0z2zrn5 + job_id: jp2k7o86p job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:14:52Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T23:02:28Z' - torchscript_onnx_tflite: - inference_time: 14169.0 - throughput: 70.57661091114404 + inference_time: 19757.0 + throughput: 50.61497190869059 estimated_peak_memory_range: - min: 794624 - max: 49550320 + min: 2543616 + max: 5365472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: jgdxqxvrp + job_id: jpv6123r5 job_status: Passed torchscript_onnx_qnn: - inference_time: 14233.0 - throughput: 70.25925665706457 + inference_time: 19382.0 + throughput: 51.59426271798576 estimated_peak_memory_range: - min: 25194496 - max: 62163856 + min: 25272320 + max: 26445744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: jprv2vy7g - job_status: Passed - torchscript_onnx: - inference_time: 22126.0 - throughput: 45.195697369610414 - estimated_peak_memory_range: - min: 30031872 - max: 83461600 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 177 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 177 - job_id: jgkeqeyng + job_id: jg9l3ovmg job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:14:54Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T23:02:18Z' - torchscript_onnx_tflite: - inference_time: 20089.0 - throughput: 49.778485738463836 + inference_time: 19681.0 + throughput: 50.81042629947665 estimated_peak_memory_range: - min: 2539520 - max: 4714992 + min: 2453504 + max: 4548456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: jpedwdvo5 + job_id: jgjv03xeg job_status: Passed torchscript_onnx_qnn: - inference_time: 19008.0 - throughput: 52.60942760942761 + inference_time: 19330.0 + throughput: 51.733057423693744 estimated_peak_memory_range: - min: 25219072 - max: 26427184 + min: 27344896 + max: 28653560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: jgdxqxvzp + job_id: jgdxr6w6p job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:14:42Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T23:02:20Z' - torchscript_onnx_tflite: - inference_time: 20071.0 - throughput: 49.82312789596931 + inference_time: 19990.0 + throughput: 50.02501250625313 estimated_peak_memory_range: min: 2138112 - max: 4538544 + max: 4411744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: j56yzjvnp + job_id: jpedr69v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 19267.0 - throughput: 51.902216224632795 + inference_time: 19296.0 + throughput: 51.824212271973465 estimated_peak_memory_range: - min: 25251840 - max: 26544184 + min: 26959872 + max: 28233784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: jpv6ro4z5 + job_id: j57yjozn5 job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:48:15Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T23:02:22Z' - torchscript_onnx_tflite: - inference_time: 20201.0 - throughput: 49.50249987624375 + inference_time: 19839.0 + throughput: 50.405766419678415 estimated_peak_memory_range: - min: 2519040 - max: 4226200 + min: 2138112 + max: 4203144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: jp3j138mg + job_id: jgz3xzex5 job_status: Passed torchscript_onnx_qnn: - inference_time: 19186.0 - throughput: 52.12133847597206 + inference_time: 19248.0 + throughput: 51.953449709060685 estimated_peak_memory_range: - min: 25272320 - max: 26560960 + min: 25743360 + max: 27220136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: jgjv2m11g + job_id: jp4lxeq25 job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:48:16Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T23:02:23Z' - torchscript_onnx_tflite: - inference_time: 19783.0 - throughput: 50.54845068998635 + inference_time: 36554.0 + throughput: 27.356787218909012 estimated_peak_memory_range: - min: 49152 - max: 2740408 + min: 2555904 + max: 43451600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: jgo2n0m1p + job_id: j5wedyom5 job_status: Passed torchscript_onnx_qnn: - inference_time: 19616.0 - throughput: 50.97879282218597 + inference_time: 35465.0 + throughput: 28.196813760045114 estimated_peak_memory_range: - min: 25264128 - max: 26916248 + min: 25182208 + max: 31049024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: jpedw1285 + job_id: jpxk70v85 job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:48:17Z' + chipset: SA8295P + timestamp: '2024-11-09T23:02:24Z' - torchscript_onnx_tflite: - inference_time: 31987.0 - throughput: 31.262700472066776 + inference_time: 32381.0 + throughput: 30.882307526018344 estimated_peak_memory_range: - min: 2576384 - max: 105523296 + min: 2564096 + max: 103392032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: jp14w488p + job_id: jg9l3ov8g job_status: Passed torchscript_onnx_qnn: - inference_time: 32588.0 - throughput: 30.686142138210386 + inference_time: 32941.0 + throughput: 30.357305485565103 estimated_peak_memory_range: - min: 25210880 - max: 58183472 + min: 25202688 + max: 58342336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: jgn6m6eq5 + job_id: j5mnw9r7p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:14:47Z' + timestamp: '2024-11-09T23:02:25Z' - torchscript_onnx_qnn: - inference_time: 19293.0 - throughput: 51.83227077178251 + inference_time: 19314.0 + throughput: 51.77591384487936 estimated_peak_memory_range: min: 25219072 max: 25219072 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: j57ylyd95 + job_id: jp14do0np job_status: Passed torchscript_onnx: - inference_time: 32754.0 - throughput: 30.530622214080722 + inference_time: 32701.0 + throughput: 30.580104583957677 estimated_peak_memory_range: - min: 25223168 - max: 25223168 + min: 25219072 + max: 25219072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 177 - job_id: jp8qmq7op + job_id: jpy148e0p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:14:53Z' + timestamp: '2024-11-09T23:02:30Z' diff --git a/qai_hub_models/models/ffnet_54s_quantized/README.md b/qai_hub_models/models/ffnet_54s_quantized/README.md index 9306773a..1e4681f9 100644 --- a/qai_hub_models/models/ffnet_54s_quantized/README.md +++ b/qai_hub_models/models/ffnet_54s_quantized/README.md @@ -5,8 +5,7 @@ FFNet-54S-Quantized is a "fuss-free network" that segments street scene images with per-pixel classes like road, sidewalk, and pedestrian. Trained on the Cityscapes dataset. -This is based on the implementation of FFNet-54S-Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/ffnet_54s_quantized). diff --git a/qai_hub_models/models/ffnet_54s_quantized/export.py b/qai_hub_models/models/ffnet_54s_quantized/export.py index 87cab4ff..ec64e597 100644 --- a/qai_hub_models/models/ffnet_54s_quantized/export.py +++ b/qai_hub_models/models/ffnet_54s_quantized/export.py @@ -206,9 +206,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser( - model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False - ) + parser = export_parser(model_cls=Model, supports_qnn=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/ffnet_54s_quantized/perf.yaml b/qai_hub_models/models/ffnet_54s_quantized/perf.yaml index 3d701f85..a7b1580c 100644 --- a/qai_hub_models/models/ffnet_54s_quantized/perf.yaml +++ b/qai_hub_models/models/ffnet_54s_quantized/perf.yaml @@ -49,11 +49,11 @@ models: - name: FFNet-54S-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 4791.0 - throughput: 208.7246921310791 + inference_time: 4834.0 + throughput: 206.8680182043856 estimated_peak_memory_range: - min: 667648 - max: 3107136 + min: 647168 + max: 2916736 primary_compute_unit: NPU precision: int8 layer_info: @@ -61,14 +61,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 120 - job_id: jpy1j1j8p + job_id: jp0z1own5 job_status: Passed torchscript_onnx: - inference_time: 11004.0 - throughput: 90.87604507451836 + inference_time: 10985.0 + throughput: 91.03322712790168 estimated_peak_memory_range: - min: 6369280 - max: 23109128 + min: 53248 + max: 16306096 primary_compute_unit: NPU precision: int8 layer_info: @@ -76,7 +76,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 217 - job_id: jpy1j168p + job_id: jp2k7o2qp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -85,51 +85,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:14:02Z' + timestamp: '2024-11-09T23:01:37Z' - torchscript_onnx_tflite: - inference_time: 4815.0 - throughput: 207.68431983385256 - estimated_peak_memory_range: - min: 49152 - max: 24694472 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 120 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 120 - job_id: jp0z2z295 - job_status: Passed - torchscript_onnx: - inference_time: 10957.0 - throughput: 91.26585744273068 - estimated_peak_memory_range: - min: 81920 - max: 16493368 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 217 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 217 - job_id: jp0z2zl95 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:14:03Z' - - torchscript_onnx_tflite: - inference_time: 4750.0 - throughput: 210.52631578947367 + inference_time: 3358.0 + throughput: 297.79630732578914 estimated_peak_memory_range: min: 638976 - max: 75437312 + max: 77039984 primary_compute_unit: NPU precision: int8 layer_info: @@ -137,14 +99,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 120 - job_id: jp8qmqmkp + job_id: jp8q3jnop job_status: Passed torchscript_onnx: - inference_time: 7926.0 - throughput: 126.16704516780217 + inference_time: 7693.0 + throughput: 129.98830105290523 estimated_peak_memory_range: - min: 7602176 - max: 133116448 + min: 7614464 + max: 135588352 primary_compute_unit: NPU precision: int8 layer_info: @@ -152,7 +114,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 217 - job_id: jp8qmqzkp + job_id: jpy1489lp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -161,13 +123,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:14:04Z' + timestamp: '2024-11-09T23:01:38Z' - torchscript_onnx_tflite: - inference_time: 3360.0 - throughput: 297.6190476190476 + inference_time: 3533.0 + throughput: 283.04557033682426 estimated_peak_memory_range: - min: 634880 - max: 35966752 + min: 8192 + max: 35443344 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,14 +137,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 120 - job_id: jgjv2vevg + job_id: jgkel61ng job_status: Passed torchscript_onnx: inference_time: 7334.0 throughput: 136.35124079629125 estimated_peak_memory_range: - min: 5939200 - max: 67004256 + min: 233472 + max: 60745536 primary_compute_unit: NPU precision: int8 layer_info: @@ -190,7 +152,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 217 - job_id: j5q6r63np + job_id: jp0z1onn5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -199,13 +161,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:14:06Z' + timestamp: '2024-11-09T23:01:39Z' - torchscript_onnx_tflite: - inference_time: 32724.0 - throughput: 30.558611416697225 + inference_time: 31220.0 + throughput: 32.03074951953876 estimated_peak_memory_range: - min: 712704 - max: 46684608 + min: 724992 + max: 46014080 primary_compute_unit: NPU precision: int8 layer_info: @@ -213,7 +175,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 120 - job_id: jgkeqeqwg + job_id: j5q674nop job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -222,13 +184,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-27T00:13:39Z' + timestamp: '2024-11-09T23:01:16Z' - torchscript_onnx_tflite: - inference_time: 199965.0 - throughput: 5.000875153151801 + inference_time: 205499.0 + throughput: 4.866203728485297 estimated_peak_memory_range: - min: 925696 - max: 3862088 + min: 954368 + max: 3133152 primary_compute_unit: NPU precision: int8 layer_info: @@ -236,7 +198,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 120 - job_id: j5q6r6rnp + job_id: jglv0wjm5 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -245,13 +207,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-27T00:13:40Z' + timestamp: '2024-11-09T23:01:17Z' - torchscript_onnx_tflite: - inference_time: 4744.0 - throughput: 210.79258010118045 + inference_time: 4693.0 + throughput: 213.08331557639036 estimated_peak_memory_range: - min: 49152 - max: 19434184 + min: 688128 + max: 2938544 primary_compute_unit: NPU precision: int8 layer_info: @@ -259,7 +221,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 120 - job_id: jglv2v2j5 + job_id: j56y3okyp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -268,13 +230,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:13:41Z' + timestamp: '2024-11-09T23:01:18Z' - torchscript_onnx_tflite: - inference_time: 4750.0 - throughput: 210.52631578947367 + inference_time: 4806.0 + throughput: 208.07324178110696 estimated_peak_memory_range: - min: 24576 - max: 190998736 + min: 659456 + max: 2720176 primary_compute_unit: NPU precision: int8 layer_info: @@ -282,7 +244,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 120 - job_id: jpxk6je85 + job_id: jp3j4oyng job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -291,13 +253,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:47:44Z' + timestamp: '2024-11-09T23:01:19Z' - torchscript_onnx_tflite: - inference_time: 4832.0 - throughput: 206.95364238410596 + inference_time: 4858.0 + throughput: 205.8460271716756 estimated_peak_memory_range: - min: 667648 - max: 3199184 + min: 659456 + max: 2860072 primary_compute_unit: NPU precision: int8 layer_info: @@ -305,7 +267,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 120 - job_id: j5mn62v7p + job_id: jgo21djkp job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -314,13 +276,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:47:45Z' + timestamp: '2024-11-09T23:01:21Z' - torchscript_onnx_tflite: - inference_time: 4690.0 - throughput: 213.21961620469082 + inference_time: 4712.0 + throughput: 212.22410865874363 estimated_peak_memory_range: - min: 40960 - max: 1953552 + min: 638976 + max: 2771272 primary_compute_unit: NPU precision: int8 layer_info: @@ -328,7 +290,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 120 - job_id: jprv2q1kg + job_id: jpv612jr5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -337,13 +299,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:47:47Z' + timestamp: '2024-11-09T23:01:22Z' - torchscript_onnx_tflite: - inference_time: 9282.0 - throughput: 107.73540185304891 + inference_time: 9219.0 + throughput: 108.47163466753445 estimated_peak_memory_range: - min: 671744 - max: 36240608 + min: 655360 + max: 35910864 primary_compute_unit: NPU precision: int8 layer_info: @@ -351,7 +313,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 120 - job_id: jgn6myrj5 + job_id: jgjv03jeg job_status: Passed reference_device_info: name: SA8295P ADP @@ -360,13 +322,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T01:47:46Z' + timestamp: '2024-11-09T23:01:23Z' - torchscript_onnx_tflite: - inference_time: 5822.0 - throughput: 171.76228100309172 + inference_time: 5940.0 + throughput: 168.35016835016836 estimated_peak_memory_range: - min: 638976 - max: 79167872 + min: 917504 + max: 79558800 primary_compute_unit: NPU precision: int8 layer_info: @@ -374,7 +336,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 120 - job_id: jpv6r6vk5 + job_id: jpedr6jv5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -383,13 +345,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:13:45Z' + timestamp: '2024-11-09T23:01:24Z' - torchscript_onnx: - inference_time: 11050.0 - throughput: 90.49773755656109 + inference_time: 11131.0 + throughput: 89.8391878537418 estimated_peak_memory_range: - min: 15089664 - max: 15089664 + min: 13807616 + max: 13807616 primary_compute_unit: NPU precision: int8 layer_info: @@ -397,7 +359,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 217 - job_id: jgkeqe3wg + job_id: jp8q3jlop job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -406,4 +368,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:14:05Z' + timestamp: '2024-11-09T23:01:40Z' diff --git a/qai_hub_models/models/ffnet_78s/README.md b/qai_hub_models/models/ffnet_78s/README.md index 15383f97..925b00ee 100644 --- a/qai_hub_models/models/ffnet_78s/README.md +++ b/qai_hub_models/models/ffnet_78s/README.md @@ -5,8 +5,7 @@ FFNet-78S is a "fuss-free network" that segments street scene images with per-pixel classes like road, sidewalk, and pedestrian. Trained on the Cityscapes dataset. -This is based on the implementation of FFNet-78S found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/ffnet_78s). diff --git a/qai_hub_models/models/ffnet_78s/perf.yaml b/qai_hub_models/models/ffnet_78s/perf.yaml index a596c19a..5b4ba120 100644 --- a/qai_hub_models/models/ffnet_78s/perf.yaml +++ b/qai_hub_models/models/ffnet_78s/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: FFNet-78S performance_metrics: - torchscript_onnx_tflite: - inference_time: 23400.0 - throughput: 42.73504273504273 + inference_time: 23430.0 + throughput: 42.680324370465215 estimated_peak_memory_range: - min: 2174976 - max: 4194856 + min: 2560000 + max: 4811104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,52 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: j5mn6nqdp - job_status: Passed - torchscript_onnx: - inference_time: 33000.0 - throughput: 30.303030303030305 - estimated_peak_memory_range: - min: 610304 - max: 31793648 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 237 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 237 - job_id: jp14w4w8p - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:12:47Z' - - torchscript_onnx_tflite: - inference_time: 23717.0 - throughput: 42.163848716110806 - estimated_peak_memory_range: - min: 2138112 - max: 4262248 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 149 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 149 - job_id: jgn6m6lk5 + job_id: j5mnw919p job_status: Passed torchscript_onnx_qnn: - inference_time: 23413.0 - throughput: 42.71131422713877 + inference_time: 23858.0 + throughput: 41.91466174867969 estimated_peak_memory_range: - min: 24936448 - max: 53744984 + min: 25214976 + max: 48934152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: j56yzye6p + job_id: jglv0wdm5 job_status: Passed torchscript_onnx: - inference_time: 32927.0 - throughput: 30.370212895192395 + inference_time: 32750.0 + throughput: 30.53435114503817 estimated_peak_memory_range: - min: 25182208 - max: 115718280 + min: 25268224 + max: 57175120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 237 - job_id: jgdxqxqrp + job_id: jgdxr6kzp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -130,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:12:48Z' + timestamp: '2024-11-09T23:00:23Z' - torchscript_onnx_tflite: - inference_time: 21144.0 - throughput: 47.29474082482028 + inference_time: 17145.0 + throughput: 58.326042578011084 estimated_peak_memory_range: - min: 1220608 - max: 136806768 + min: 958464 + max: 136339616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -144,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jprv2v80g + job_id: jgn691dq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 21429.0 - throughput: 46.66573335199963 + inference_time: 17555.0 + throughput: 56.96382796923953 estimated_peak_memory_range: - min: 21012480 - max: 62392448 + min: 176345088 + max: 219082496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jp3j1jv3g + job_id: j56y3oxyp job_status: Passed torchscript_onnx: - inference_time: 29410.0 - throughput: 34.002040122407344 + inference_time: 24814.0 + throughput: 40.29983074071089 estimated_peak_memory_range: - min: 1110016 - max: 161023328 + min: 31211520 + max: 193788464 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,7 +138,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 237 - job_id: j57ylylv5 + job_id: j57yjom95 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -183,13 +147,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:12:49Z' + timestamp: '2024-11-09T23:00:24Z' - torchscript_onnx_tflite: - inference_time: 16827.0 - throughput: 59.42829975634397 + inference_time: 14037.0 + throughput: 71.24029351000927 estimated_peak_memory_range: - min: 1798144 - max: 56890240 + min: 2293760 + max: 57645472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -197,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: j5q6r6enp + job_id: jprv4xm7g job_status: Passed torchscript_onnx_qnn: - inference_time: 16899.0 - throughput: 59.17509911829102 + inference_time: 16989.0 + throughput: 58.86161633998469 estimated_peak_memory_range: min: 25178112 - max: 65983472 + max: 66023808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jg9lylywg + job_id: jp3j4odng job_status: Passed torchscript_onnx: - inference_time: 23662.0 - throughput: 42.261854450173274 + inference_time: 20887.0 + throughput: 47.87666969885575 estimated_peak_memory_range: - min: 32432128 - max: 96594816 + min: 29093888 + max: 91607264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,7 +191,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 237 - job_id: jpxk6k635 + job_id: jp4lxe715 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -236,13 +200,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:12:51Z' + timestamp: '2024-11-09T23:00:25Z' - torchscript_onnx_tflite: - inference_time: 23009.0 - throughput: 43.46125429179886 + inference_time: 23314.0 + throughput: 42.892682508364075 estimated_peak_memory_range: - min: 2555904 - max: 5151592 + min: 2592768 + max: 4509008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -250,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jp2k9k0rp + job_id: jp2k7oqqp job_status: Passed torchscript_onnx_qnn: - inference_time: 22824.0 - throughput: 43.813529617946024 + inference_time: 23039.0 + throughput: 43.404661660662356 estimated_peak_memory_range: - min: 25260032 - max: 26448784 + min: 25264128 + max: 26454408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,7 +229,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jgo2n2kqp + job_id: jpv6128r5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -274,13 +238,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:12:39Z' + timestamp: '2024-11-09T23:00:15Z' - torchscript_onnx_tflite: inference_time: 23167.0 throughput: 43.164846548970516 estimated_peak_memory_range: - min: 2547712 - max: 4814064 + min: 2555904 + max: 4584552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -288,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jpv6rozz5 + job_id: jpy148klp job_status: Passed torchscript_onnx_qnn: - inference_time: 24081.0 - throughput: 41.52651467962294 + inference_time: 23598.0 + throughput: 42.37647258242224 estimated_peak_memory_range: min: 25276416 - max: 26515328 + max: 26721464 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,7 +267,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: j5we3vm45 + job_id: jpedr6qv5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -312,13 +276,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:47:00Z' + timestamp: '2024-11-09T23:00:17Z' - torchscript_onnx_tflite: - inference_time: 23405.0 - throughput: 42.72591326639607 + inference_time: 23920.0 + throughput: 41.80602006688963 estimated_peak_memory_range: - min: 2568192 - max: 4997816 + min: 2519040 + max: 4465968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -326,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jpedw1485 + job_id: jp0z1o8n5 job_status: Passed torchscript_onnx_qnn: - inference_time: 23378.0 - throughput: 42.77525879031568 + inference_time: 23417.0 + throughput: 42.70401844813597 estimated_peak_memory_range: - min: 25284608 - max: 26585144 + min: 25260032 + max: 28933592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,7 +305,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jg9ly19mg + job_id: jgz3xz6x5 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -350,13 +314,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:47:01Z' + timestamp: '2024-11-09T23:00:18Z' - torchscript_onnx_tflite: - inference_time: 23481.0 - throughput: 42.587624036455004 + inference_time: 23660.0 + throughput: 42.265426880811496 estimated_peak_memory_range: min: 2555904 - max: 93697016 + max: 4921888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -364,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jgz3j9v45 + job_id: jp8q3jdop job_status: Passed torchscript_onnx_qnn: - inference_time: 23253.0 - throughput: 43.00520362963918 + inference_time: 23682.0 + throughput: 42.226163330799764 estimated_peak_memory_range: min: 25284608 - max: 26408464 + max: 26581464 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,7 +343,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jp14wlqnp + job_id: j5wedykm5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -388,13 +352,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:47:02Z' + timestamp: '2024-11-09T23:00:20Z' + - torchscript_onnx_tflite: + inference_time: 44269.0 + throughput: 22.58917075154171 + estimated_peak_memory_range: + min: 2506752 + max: 45667568 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 149 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 149 + job_id: jgkel6wng + job_status: Passed + torchscript_onnx_qnn: + inference_time: 43534.0 + throughput: 22.970551752653098 + estimated_peak_memory_range: + min: 25182208 + max: 31055968 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 235 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 235 + job_id: jg9l3or8g + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T23:00:21Z' - torchscript_onnx_tflite: - inference_time: 39370.0 - throughput: 25.4000508001016 + inference_time: 41048.0 + throughput: 24.361722861040732 estimated_peak_memory_range: - min: 2650112 - max: 116226016 + min: 1736704 + max: 112903376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -402,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jgkeqe7wg + job_id: j5q674xop job_status: Passed torchscript_onnx_qnn: - inference_time: 39221.0 - throughput: 25.496545218122943 + inference_time: 39495.0 + throughput: 25.319660716546398 estimated_peak_memory_range: - min: 25223168 - max: 60492960 + min: 14553088 + max: 52025136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: j5we3e335 + job_id: jp14do97p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -426,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:12:45Z' + timestamp: '2024-11-09T23:00:22Z' - torchscript_onnx_qnn: - inference_time: 23010.0 - throughput: 43.459365493263796 + inference_time: 23189.0 + throughput: 43.123894950191904 estimated_peak_memory_range: min: 25219072 max: 25219072 @@ -440,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jpv6r6rk5 + job_id: jgjv039eg job_status: Passed torchscript_onnx: - inference_time: 36462.0 - throughput: 27.42581317536065 + inference_time: 36432.0 + throughput: 27.448397013614404 estimated_peak_memory_range: - min: 33476608 - max: 33476608 + min: 33333248 + max: 33333248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -455,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 237 - job_id: jp4ldld85 + job_id: jpxk70ql5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -464,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:12:50Z' + timestamp: '2024-11-09T23:00:26Z' diff --git a/qai_hub_models/models/ffnet_78s_lowres/README.md b/qai_hub_models/models/ffnet_78s_lowres/README.md index cb86eb48..71d78605 100644 --- a/qai_hub_models/models/ffnet_78s_lowres/README.md +++ b/qai_hub_models/models/ffnet_78s_lowres/README.md @@ -5,8 +5,7 @@ FFNet-78S-LowRes is a "fuss-free network" that segments street scene images with per-pixel classes like road, sidewalk, and pedestrian. Trained on the Cityscapes dataset. -This is based on the implementation of FFNet-78S-LowRes found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/ffnet_78s_lowres). diff --git a/qai_hub_models/models/ffnet_78s_lowres/perf.yaml b/qai_hub_models/models/ffnet_78s_lowres/perf.yaml index 518ce8f8..665abf65 100644 --- a/qai_hub_models/models/ffnet_78s_lowres/perf.yaml +++ b/qai_hub_models/models/ffnet_78s_lowres/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: FFNet-78S-LowRes performance_metrics: - torchscript_onnx_tflite: - inference_time: 8323.0 - throughput: 120.14898474107893 + inference_time: 8264.0 + throughput: 121.00677637947724 estimated_peak_memory_range: - min: 643072 - max: 3052232 + min: 155648 + max: 1946592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jgdxqxnrp + job_id: jgdxr6mzp job_status: Passed torchscript_onnx_qnn: - inference_time: 8297.0 - throughput: 120.5254911413764 + inference_time: 8274.0 + throughput: 120.86052695189751 estimated_peak_memory_range: - min: 6332416 - max: 25164280 + min: 6336512 + max: 33907264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: jp0z2z495 + job_id: jp0z1oxn5 job_status: Passed torchscript_onnx: - inference_time: 8111.0 - throughput: 123.28936012822093 + inference_time: 8079.0 + throughput: 123.77769525931427 estimated_peak_memory_range: - min: 81920 - max: 53635840 + min: 32768 + max: 53563352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 238 - job_id: jpedwdeo5 + job_id: jpedr60v5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:11:57Z' + timestamp: '2024-11-09T22:59:33Z' - torchscript_onnx_tflite: - inference_time: 8304.0 - throughput: 120.42389210019267 + inference_time: 5721.0 + throughput: 174.79461632581717 estimated_peak_memory_range: - min: 671744 - max: 2696544 + min: 643072 + max: 64591456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: j57yly2v5 + job_id: j57yjo895 job_status: Passed torchscript_onnx_qnn: - inference_time: 8391.0 - throughput: 119.1753068764152 + inference_time: 5849.0 + throughput: 170.96939647803043 estimated_peak_memory_range: - min: 6320128 - max: 31442520 + min: 6307840 + max: 32492432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: jp8qmq2kp + job_id: jp8q3jkop job_status: Passed torchscript_onnx: - inference_time: 8218.0 - throughput: 121.68410805548795 + inference_time: 5848.0 + throughput: 170.9986320109439 estimated_peak_memory_range: - min: 81920 - max: 53851472 + min: 7651328 + max: 92716208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 238 - job_id: jgz3j3oo5 + job_id: jgz3xzqx5 job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:11:58Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T22:59:34Z' - torchscript_onnx_tflite: - inference_time: 8799.0 - throughput: 113.64927832708263 + inference_time: 4707.0 + throughput: 212.44954323348205 estimated_peak_memory_range: - min: 655360 - max: 62902416 + min: 65536 + max: 30118688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jp4ldln85 + job_id: jp4lxe215 job_status: Passed torchscript_onnx_qnn: - inference_time: 8854.0 - throughput: 112.943302462164 + inference_time: 4754.0 + throughput: 210.34917963819942 estimated_peak_memory_range: - min: 6307840 - max: 30464464 + min: 0 + max: 21505408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: jgkeqevwg + job_id: jgkel6kng job_status: Passed torchscript_onnx: - inference_time: 6945.0 - throughput: 143.98848092152627 + inference_time: 5862.0 + throughput: 170.590242238144 estimated_peak_memory_range: - min: 1196032 - max: 84915104 + min: 2973696 + max: 48022096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 238 - job_id: j5we3e235 + job_id: j5wedy0m5 job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:11:59Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T22:59:35Z' - torchscript_onnx_tflite: - inference_time: 4727.0 - throughput: 211.5506663845991 + inference_time: 8143.0 + throughput: 122.80486307257767 estimated_peak_memory_range: - min: 634880 - max: 30780096 + min: 655360 + max: 2322784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jpy1j1z8p + job_id: jpxk70zl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 5911.0 - throughput: 169.1761123329386 + inference_time: 7645.0 + throughput: 130.80444735120994 estimated_peak_memory_range: - min: 6303744 - max: 28168544 + min: 6373376 + max: 7582680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: jgjv2vzvg - job_status: Passed - torchscript_onnx: - inference_time: 5838.0 - throughput: 171.29153819801303 - estimated_peak_memory_range: - min: 0 - max: 44452416 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 238 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 238 - job_id: jp14w4y8p + job_id: j5q674dop job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:12:02Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T22:59:25Z' - torchscript_onnx_tflite: - inference_time: 8205.0 - throughput: 121.8769043266301 + inference_time: 8250.0 + throughput: 121.21212121212122 estimated_peak_memory_range: - min: 638976 - max: 5958344 + min: 684032 + max: 2364352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jpxk6k935 + job_id: j5mnw9l9p job_status: Passed torchscript_onnx_qnn: - inference_time: 7617.0 - throughput: 131.2852829197847 + inference_time: 7736.0 + throughput: 129.26577042399174 estimated_peak_memory_range: - min: 6365184 - max: 7989576 + min: 6369280 + max: 7894480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: j5q6r60np + job_id: j56y3o9yp job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:11:49Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T22:59:27Z' - torchscript_onnx_tflite: - inference_time: 8338.0 - throughput: 119.93283761093788 + inference_time: 8276.0 + throughput: 120.8313194780087 estimated_peak_memory_range: - min: 651264 - max: 2738064 + min: 0 + max: 2047824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jpy1jw40p + job_id: jgn691wq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 7758.0 - throughput: 128.89920082495487 + inference_time: 7737.0 + throughput: 129.24906294429366 estimated_peak_memory_range: - min: 6332416 - max: 7676688 + min: 6336512 + max: 8058480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: jgkeqnrvg + job_id: jp3j4olng job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:46:32Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T22:59:28Z' - torchscript_onnx_tflite: - inference_time: 8206.0 - throughput: 121.86205215695833 + inference_time: 8265.0 + throughput: 120.99213551119178 estimated_peak_memory_range: - min: 647168 - max: 2750792 + min: 12288 + max: 24655864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jp0z2qe05 + job_id: jprv4x77g job_status: Passed torchscript_onnx_qnn: - inference_time: 7754.0 - throughput: 128.96569512509672 + inference_time: 7624.0 + throughput: 131.1647429171039 estimated_peak_memory_range: - min: 6373376 - max: 8094488 + min: 6336512 + max: 7702336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: j5q6rk9ep + job_id: jgo21d7kp job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:46:33Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T22:59:30Z' - torchscript_onnx_tflite: - inference_time: 8312.0 - throughput: 120.30798845043311 + inference_time: 14374.0 + throughput: 69.57005704744678 estimated_peak_memory_range: - min: 237568 - max: 2327832 + min: 659456 + max: 25750912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jp8qm9wqp + job_id: jp2k7ozqp job_status: Passed torchscript_onnx_qnn: - inference_time: 7699.0 - throughput: 129.88699831146903 + inference_time: 13596.0 + throughput: 73.55104442483083 estimated_peak_memory_range: - min: 6336512 - max: 8095640 + min: 737280 + max: 6425312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: jglv2ze25 + job_id: jgz3x1lk5 job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:46:34Z' + chipset: SA8295P + timestamp: '2024-11-11T13:56:18Z' - torchscript_onnx_tflite: - inference_time: 11995.0 - throughput: 83.36807002917882 + inference_time: 12361.0 + throughput: 80.8996035919424 estimated_peak_memory_range: - min: 675840 - max: 56606928 + min: 638976 + max: 53393856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jp2k9kxrp + job_id: jpy148ylp job_status: Passed torchscript_onnx_qnn: - inference_time: 12399.0 - throughput: 80.65166545689168 + inference_time: 12443.0 + throughput: 80.36647110825363 estimated_peak_memory_range: - min: 3174400 - max: 29194224 + min: 6299648 + max: 27530528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: jpv6r60k5 + job_id: jgjv036eg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:11:55Z' + timestamp: '2024-11-09T22:59:32Z' - torchscript_onnx_qnn: - inference_time: 8142.0 - throughput: 122.81994595922377 + inference_time: 8123.0 + throughput: 123.10722639418934 estimated_peak_memory_range: min: 6303744 max: 6303744 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: jglv2v4j5 + job_id: jglv0w9m5 job_status: Passed torchscript_onnx: - inference_time: 8767.0 - throughput: 114.06410402646287 + inference_time: 8829.0 + throughput: 113.26311020500623 estimated_peak_memory_range: - min: 50323456 - max: 50323456 + min: 49397760 + max: 49397760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 238 - job_id: jg9lyljwg + job_id: jg9l3o78g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:12:00Z' + timestamp: '2024-11-09T22:59:36Z' diff --git a/qai_hub_models/models/ffnet_78s_quantized/README.md b/qai_hub_models/models/ffnet_78s_quantized/README.md index f370cdde..43805122 100644 --- a/qai_hub_models/models/ffnet_78s_quantized/README.md +++ b/qai_hub_models/models/ffnet_78s_quantized/README.md @@ -5,8 +5,7 @@ FFNet-78S-Quantized is a "fuss-free network" that segments street scene images with per-pixel classes like road, sidewalk, and pedestrian. Trained on the Cityscapes dataset. -This is based on the implementation of FFNet-78S-Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/ffnet_78s_quantized). diff --git a/qai_hub_models/models/ffnet_78s_quantized/export.py b/qai_hub_models/models/ffnet_78s_quantized/export.py index dbfc1b6e..0d4b69ca 100644 --- a/qai_hub_models/models/ffnet_78s_quantized/export.py +++ b/qai_hub_models/models/ffnet_78s_quantized/export.py @@ -202,9 +202,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser( - model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False - ) + parser = export_parser(model_cls=Model, supports_qnn=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/ffnet_78s_quantized/perf.yaml b/qai_hub_models/models/ffnet_78s_quantized/perf.yaml index 5b9c7eea..3981ffc9 100644 --- a/qai_hub_models/models/ffnet_78s_quantized/perf.yaml +++ b/qai_hub_models/models/ffnet_78s_quantized/perf.yaml @@ -49,11 +49,11 @@ models: - name: FFNet-78S-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 5704.0 - throughput: 175.3155680224404 + inference_time: 5840.0 + throughput: 171.23287671232876 estimated_peak_memory_range: - min: 24576 - max: 2276408 + min: 667648 + max: 2885048 primary_compute_unit: NPU precision: int8 layer_info: @@ -61,52 +61,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 156 - job_id: jgz3j3dk5 + job_id: jp14do18p job_status: Passed torchscript_onnx: - inference_time: 11962.0 - throughput: 83.59806052499582 + inference_time: 11928.0 + throughput: 83.83635144198524 estimated_peak_memory_range: - min: 32768 - max: 24973360 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 301 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 301 - job_id: jgz3j33k5 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:11:10Z' - - torchscript_onnx_tflite: - inference_time: 5618.0 - throughput: 177.99928800284798 - estimated_peak_memory_range: - min: 638976 - max: 2544856 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 156 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 156 - job_id: j5we3ee65 - job_status: Passed - torchscript_onnx: - inference_time: 11983.0 - throughput: 83.45155637152632 - estimated_peak_memory_range: - min: 7208960 - max: 10664072 + min: 49152 + max: 384706248 primary_compute_unit: NPU precision: int8 layer_info: @@ -114,7 +76,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 301 - job_id: j5we3ew65 + job_id: j5wedyr35 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -123,13 +85,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:11:11Z' + timestamp: '2024-11-09T22:58:47Z' - torchscript_onnx_tflite: - inference_time: 4039.0 - throughput: 247.5860361475613 + inference_time: 4034.0 + throughput: 247.8929102627665 estimated_peak_memory_range: min: 638976 - max: 90421968 + max: 92626608 primary_compute_unit: NPU precision: int8 layer_info: @@ -137,14 +99,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 156 - job_id: jg9lylllg + job_id: jgdxr64rp job_status: Passed torchscript_onnx: - inference_time: 8379.0 - throughput: 119.34598400763814 + inference_time: 8243.0 + throughput: 121.31505519835011 estimated_peak_memory_range: - min: 4640768 - max: 158635296 + min: 7577600 + max: 164106944 primary_compute_unit: NPU precision: int8 layer_info: @@ -152,7 +114,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 301 - job_id: jg9lyl0lg + job_id: jg9l3oqwg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -161,13 +123,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:11:12Z' + timestamp: '2024-11-09T22:58:48Z' - torchscript_onnx_tflite: - inference_time: 4050.0 - throughput: 246.91358024691357 + inference_time: 4055.0 + throughput: 246.6091245376079 estimated_peak_memory_range: - min: 16384 - max: 39398576 + min: 32768 + max: 39497616 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,14 +137,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 156 - job_id: jprv2vv9g + job_id: j57yjonv5 job_status: Passed torchscript_onnx: - inference_time: 8024.0 - throughput: 124.62612163509472 + inference_time: 7065.0 + throughput: 141.54281670205236 estimated_peak_memory_range: - min: 5881856 - max: 78914448 + min: 2310144 + max: 74626384 primary_compute_unit: NPU precision: int8 layer_info: @@ -190,7 +152,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 301 - job_id: jgdxqxnep + job_id: jp14dom8p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -199,13 +161,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:11:15Z' + timestamp: '2024-11-09T22:58:49Z' - torchscript_onnx_tflite: - inference_time: 36559.0 - throughput: 27.35304576164556 + inference_time: 35537.0 + throughput: 28.139685398317248 estimated_peak_memory_range: - min: 741376 - max: 50642032 + min: 36864 + max: 49412192 primary_compute_unit: NPU precision: int8 layer_info: @@ -213,7 +175,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 156 - job_id: jp14w442p + job_id: jp4lxe485 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -222,13 +184,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-27T00:10:46Z' + timestamp: '2024-11-09T22:58:27Z' - torchscript_onnx_tflite: - inference_time: 217127.0 - throughput: 4.6055994878573365 + inference_time: 219116.0 + throughput: 4.563792694280655 estimated_peak_memory_range: - min: 688128 - max: 7701608 + min: 925696 + max: 9391984 primary_compute_unit: NPU precision: int8 layer_info: @@ -236,7 +198,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 156 - job_id: jgdxqxxep + job_id: jpxk70r35 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -245,13 +207,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-27T00:10:47Z' + timestamp: '2024-11-09T22:58:28Z' - torchscript_onnx_tflite: - inference_time: 5665.0 - throughput: 176.522506619594 + inference_time: 5684.0 + throughput: 175.93244194229416 estimated_peak_memory_range: - min: 659456 - max: 2187512 + min: 647168 + max: 2401376 primary_compute_unit: NPU precision: int8 layer_info: @@ -259,7 +221,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 156 - job_id: j57ylyyl5 + job_id: j5mnw9kdp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -268,13 +230,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:10:48Z' + timestamp: '2024-11-09T22:58:29Z' - torchscript_onnx_tflite: - inference_time: 5708.0 - throughput: 175.1927119831815 + inference_time: 5742.0 + throughput: 174.15534656913968 estimated_peak_memory_range: - min: 659456 - max: 2845504 + min: 49152 + max: 2238672 primary_compute_unit: NPU precision: int8 layer_info: @@ -282,7 +244,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 156 - job_id: jg9ly13mg + job_id: jgn691qk5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -291,13 +253,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:46:01Z' + timestamp: '2024-11-09T22:58:30Z' - torchscript_onnx_tflite: - inference_time: 5781.0 - throughput: 172.9804532087874 + inference_time: 5818.0 + throughput: 171.88037126160194 estimated_peak_memory_range: - min: 647168 - max: 2846528 + min: 671744 + max: 3073136 primary_compute_unit: NPU precision: int8 layer_info: @@ -305,7 +267,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 156 - job_id: jp14wldnp + job_id: jprv4xd0g job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -314,13 +276,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:46:02Z' + timestamp: '2024-11-09T22:58:31Z' - torchscript_onnx_tflite: - inference_time: 5781.0 - throughput: 172.9804532087874 + inference_time: 5788.0 + throughput: 172.77125086385627 estimated_peak_memory_range: - min: 663552 - max: 2676512 + min: 643072 + max: 2734904 primary_compute_unit: NPU precision: int8 layer_info: @@ -328,7 +290,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 156 - job_id: j57ylwjn5 + job_id: jp2k7odrp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -337,13 +299,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:46:04Z' + timestamp: '2024-11-09T22:58:32Z' - torchscript_onnx_tflite: - inference_time: 11224.0 - throughput: 89.09479686386315 + inference_time: 11213.0 + throughput: 89.18219923303309 estimated_peak_memory_range: - min: 655360 - max: 40189072 + min: 688128 + max: 40121056 primary_compute_unit: NPU precision: int8 layer_info: @@ -351,7 +313,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 156 - job_id: jgdxq9r6p + job_id: jpy14828p job_status: Passed reference_device_info: name: SA8295P ADP @@ -360,13 +322,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T01:46:03Z' + timestamp: '2024-11-09T22:58:33Z' - torchscript_onnx_tflite: - inference_time: 7111.0 - throughput: 140.6271972999578 + inference_time: 7082.0 + throughput: 141.2030499858797 estimated_peak_memory_range: - min: 700416 - max: 94377440 + min: 663552 + max: 94745184 primary_compute_unit: NPU precision: int8 layer_info: @@ -374,7 +336,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 156 - job_id: jgn6m66r5 + job_id: jp0z1o995 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -383,13 +345,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:10:53Z' + timestamp: '2024-11-09T22:58:34Z' - torchscript_onnx: - inference_time: 12421.0 - throughput: 80.50881571532082 + inference_time: 12451.0 + throughput: 80.31483414986748 estimated_peak_memory_range: - min: 24506368 - max: 24506368 + min: 24416256 + max: 24416256 primary_compute_unit: NPU precision: int8 layer_info: @@ -397,7 +359,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 301 - job_id: jp14w422p + job_id: jgdxr6mrp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -406,4 +368,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:11:14Z' + timestamp: '2024-11-09T22:58:50Z' diff --git a/qai_hub_models/models/foot_track_net/README.md b/qai_hub_models/models/foot_track_net/README.md index f073400f..94cec09f 100644 --- a/qai_hub_models/models/foot_track_net/README.md +++ b/qai_hub_models/models/foot_track_net/README.md @@ -3,10 +3,9 @@ # [Person-Foot-Detection: Multi-task Human detector](https://aihub.qualcomm.com/models/foot_track_net) -FootTrackNet can detect person and face bounding boxes, head and feet landmark locations and feet visibility. +Real-time multiple person detection with accurate feet localization optimized for mobile and edge. -This is based on the implementation of Person-Foot-Detection found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/foot_track_net). diff --git a/qai_hub_models/models/foot_track_net/info.yaml b/qai_hub_models/models/foot_track_net/info.yaml index d88a4af6..4e418b01 100644 --- a/qai_hub_models/models/foot_track_net/info.yaml +++ b/qai_hub_models/models/foot_track_net/info.yaml @@ -4,8 +4,7 @@ id: foot_track_net status: public headline: Multi-task Human detector. domain: Computer Vision -description: FootTrackNet can detect person and face bounding boxes, head and feet landmark - locations and feet visibility. +description: Real-time multiple person detection with accurate feet localization optimized for mobile and edge. use_case: Object Detection tags: - real-time diff --git a/qai_hub_models/models/foot_track_net/perf.yaml b/qai_hub_models/models/foot_track_net/perf.yaml index dd1acc2f..7c10ff65 100644 --- a/qai_hub_models/models/foot_track_net/perf.yaml +++ b/qai_hub_models/models/foot_track_net/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,108 +36,56 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: Person-Foot-Detection performance_metrics: - torchscript_onnx_tflite: - inference_time: 3516.0 - throughput: 284.4141069397042 + inference_time: 4944.0 + throughput: 202.26537216828478 estimated_peak_memory_range: - min: 12288 - max: 7704976 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 134 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 134 - job_id: jgo2n6qxp - job_status: Passed - torchscript_onnx_qnn: - inference_time: 3610.0 - throughput: 277.0083102493075 - estimated_peak_memory_range: - min: 4210688 - max: 11439216 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 196 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 196 - job_id: j57ylyrl5 - job_status: Passed - torchscript_onnx: - inference_time: 5272.0 - throughput: 189.6813353566009 - estimated_peak_memory_range: - min: 14729216 - max: 18741344 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 201 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 201 - job_id: jgkeqex2g - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:09:41Z' - - torchscript_onnx_tflite: - inference_time: 3560.0 - throughput: 280.8988764044944 - estimated_peak_memory_range: - min: 28672 - max: 133580280 + min: 5132288 + max: 6747728 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 134 + layers_on_npu: 135 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 134 - job_id: jpv6rkxj5 + total_layers: 135 + job_id: jpedr66o5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3559.0 - throughput: 280.97780275358247 + inference_time: 5062.0 + throughput: 197.55037534571315 estimated_peak_memory_range: - min: 880640 - max: 9027984 + min: 3723264 + max: 11629080 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 196 + layers_on_npu: 197 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 196 - job_id: jp4ldlrv5 + total_layers: 197 + job_id: j5mnw99dp job_status: Passed torchscript_onnx: - inference_time: 5291.0 - throughput: 189.000189000189 + inference_time: 5472.0 + throughput: 182.7485380116959 estimated_peak_memory_range: - min: 14733312 - max: 63997368 + min: 14712832 + max: 18896800 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 201 + layers_on_npu: 202 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 201 - job_id: j5q6r6q4p + total_layers: 202 + job_id: j56y3oo6p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -145,51 +94,51 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:09:42Z' + timestamp: '2024-11-09T22:57:19Z' - torchscript_onnx_tflite: - inference_time: 2877.0 - throughput: 347.58428919012863 + inference_time: 3443.0 + throughput: 290.4443799012489 estimated_peak_memory_range: min: 12288 - max: 57789920 + max: 60720592 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 134 + layers_on_npu: 135 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 134 - job_id: jgjv2n4xg + total_layers: 135 + job_id: jgz3xzzo5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3037.0 - throughput: 329.27230819888047 + inference_time: 3564.0 + throughput: 280.58361391694723 estimated_peak_memory_range: - min: 3702784 - max: 24272352 + min: 0 + max: 22059360 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 196 + layers_on_npu: 197 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 196 - job_id: jpxk6ko15 + total_layers: 197 + job_id: jgn6911k5 job_status: Passed torchscript_onnx: - inference_time: 4278.0 - throughput: 233.75409069658718 + inference_time: 4096.0 + throughput: 244.140625 estimated_peak_memory_range: - min: 14745600 - max: 83403936 + min: 20451328 + max: 92085504 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 201 + layers_on_npu: 202 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 201 - job_id: jglv2vm85 + total_layers: 202 + job_id: jp3j4oo3g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -198,51 +147,51 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:09:43Z' + timestamp: '2024-11-09T22:57:20Z' - torchscript_onnx_tflite: - inference_time: 2514.0 - throughput: 397.77247414478916 + inference_time: 3601.0 + throughput: 277.700638711469 estimated_peak_memory_range: min: 8192 - max: 29386608 + max: 30199456 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 134 + layers_on_npu: 135 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 134 - job_id: jgdxqx1ep + total_layers: 135 + job_id: j5wedyy35 job_status: Passed torchscript_onnx_qnn: - inference_time: 2668.0 - throughput: 374.8125937031484 + inference_time: 3635.0 + throughput: 275.1031636863824 estimated_peak_memory_range: - min: 1556480 - max: 19052928 + min: 0 + max: 18787840 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 196 + layers_on_npu: 197 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 196 - job_id: jp8qmqyxp + total_layers: 197 + job_id: jprv4xx0g job_status: Passed torchscript_onnx: - inference_time: 3681.0 - throughput: 271.66530834012497 + inference_time: 3869.0 + throughput: 258.46471956577926 estimated_peak_memory_range: - min: 0 - max: 35628448 + min: 20152320 + max: 56250880 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 201 + layers_on_npu: 202 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 201 - job_id: jp3j1j0lg + total_layers: 202 + job_id: jgo21ddqp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -251,36 +200,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:09:45Z' + timestamp: '2024-11-09T22:57:21Z' - torchscript_onnx_tflite: - inference_time: 3410.0 - throughput: 293.2551319648094 + inference_time: 4893.0 + throughput: 204.37359493153485 estimated_peak_memory_range: - min: 12288 - max: 1578456 + min: 5087232 + max: 45378352 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 134 + layers_on_npu: 135 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 134 - job_id: jpedwm315 + total_layers: 135 + job_id: jg9l3oowg job_status: Passed torchscript_onnx_qnn: - inference_time: 3325.0 - throughput: 300.7518796992481 + inference_time: 4778.0 + throughput: 209.2925910422771 estimated_peak_memory_range: - min: 2654208 - max: 4249192 + min: 3731456 + max: 5499776 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 196 + layers_on_npu: 197 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 196 - job_id: j5mn6nxwp + total_layers: 197 + job_id: jp2k7oorp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -289,36 +238,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:09:33Z' + timestamp: '2024-11-09T22:57:11Z' - torchscript_onnx_tflite: - inference_time: 3385.0 - throughput: 295.4209748892171 + inference_time: 5038.0 + throughput: 198.4914648670107 estimated_peak_memory_range: - min: 12288 - max: 5531248 + min: 5103616 + max: 59056384 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 134 + layers_on_npu: 135 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 134 - job_id: jgkeqnlvg + total_layers: 135 + job_id: jp14doo8p job_status: Passed torchscript_onnx_qnn: - inference_time: 3382.0 - throughput: 295.68302779420463 + inference_time: 4845.0 + throughput: 206.39834881320948 estimated_peak_memory_range: - min: 3731456 - max: 8383072 + min: 3756032 + max: 8280632 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 196 + layers_on_npu: 197 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 196 - job_id: j56yzj3np + total_layers: 197 + job_id: jp0z1oo95 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -327,36 +276,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:45:04Z' + timestamp: '2024-11-09T22:57:13Z' - torchscript_onnx_tflite: - inference_time: 3422.0 - throughput: 292.22676797194623 + inference_time: 4950.0 + throughput: 202.02020202020202 estimated_peak_memory_range: - min: 12288 - max: 11957208 + min: 5087232 + max: 133286704 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 134 + layers_on_npu: 135 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 134 - job_id: j5q6rk7ep + total_layers: 135 + job_id: jgdxr66rp job_status: Passed torchscript_onnx_qnn: - inference_time: 3375.0 - throughput: 296.2962962962963 + inference_time: 4914.0 + throughput: 203.5002035002035 estimated_peak_memory_range: - min: 2465792 - max: 4351792 + min: 3764224 + max: 5325824 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 196 + layers_on_npu: 197 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 196 - job_id: jp3j134mg + total_layers: 197 + job_id: jp8q3jjkp job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -365,36 +314,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:45:05Z' + timestamp: '2024-11-09T22:57:14Z' - torchscript_onnx_tflite: - inference_time: 3430.0 - throughput: 291.5451895043732 + inference_time: 5010.0 + throughput: 199.6007984031936 estimated_peak_memory_range: - min: 12288 - max: 8057768 + min: 4227072 + max: 7453256 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 134 + layers_on_npu: 135 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 134 - job_id: jglv2z025 + total_layers: 135 + job_id: j57yjoov5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3338.0 - throughput: 299.58058717795086 + inference_time: 4948.0 + throughput: 202.1018593371059 estimated_peak_memory_range: - min: 2220032 - max: 4760096 + min: 3743744 + max: 6408552 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 196 + layers_on_npu: 197 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 196 - job_id: jgo2n011p + total_layers: 197 + job_id: jgkel66wg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -403,36 +352,74 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:45:06Z' + timestamp: '2024-11-09T22:57:15Z' + - torchscript_onnx_tflite: + inference_time: 8458.0 + throughput: 118.23126034523528 + estimated_peak_memory_range: + min: 5099520 + max: 33983744 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 135 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 135 + job_id: jp4lxee85 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 8814.0 + throughput: 113.45586566825504 + estimated_peak_memory_range: + min: 49152 + max: 6014480 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 197 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 197 + job_id: j5q6744np + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:57:16Z' - torchscript_onnx_tflite: - inference_time: 5635.0 - throughput: 177.4622892635315 + inference_time: 7095.0 + throughput: 140.94432699083862 estimated_peak_memory_range: - min: 5107712 - max: 61099264 + min: 5111808 + max: 62483216 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 134 + layers_on_npu: 135 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 134 - job_id: jp14w4z2p + total_layers: 135 + job_id: jpxk70035 job_status: Passed torchscript_onnx_qnn: - inference_time: 5780.0 - throughput: 173.01038062283737 + inference_time: 7526.0 + throughput: 132.87270794578794 estimated_peak_memory_range: - min: 4153344 - max: 24285264 + min: 3702784 + max: 28863568 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 196 + layers_on_npu: 197 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 196 - job_id: jp0z2z065 + total_layers: 197 + job_id: jglv0wwj5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,36 +428,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:09:39Z' + timestamp: '2024-11-09T22:57:17Z' - torchscript_onnx_qnn: - inference_time: 3662.0 - throughput: 273.07482250136536 + inference_time: 5363.0 + throughput: 186.46280067126608 estimated_peak_memory_range: min: 3690496 max: 3690496 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 196 + layers_on_npu: 197 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 196 - job_id: jgn6m6vr5 + total_layers: 197 + job_id: jpy14888p job_status: Passed torchscript_onnx: - inference_time: 5641.0 - throughput: 177.27353306151392 + inference_time: 5875.0 + throughput: 170.2127659574468 estimated_peak_memory_range: - min: 17436672 - max: 17436672 + min: 17424384 + max: 17424384 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 201 + layers_on_npu: 202 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 201 - job_id: j56yzy40p + total_layers: 202 + job_id: jpv612mk5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:09:44Z' + timestamp: '2024-11-09T22:57:22Z' diff --git a/qai_hub_models/models/foot_track_net_quantized/README.md b/qai_hub_models/models/foot_track_net_quantized/README.md index 0a4918ac..050294e8 100644 --- a/qai_hub_models/models/foot_track_net_quantized/README.md +++ b/qai_hub_models/models/foot_track_net_quantized/README.md @@ -1,12 +1,11 @@ [![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md) -# [FootTrackNet_Quantized: Multi-task Human detector](https://aihub.qualcomm.com/models/foot_track_net_quantized) +# [Person-Foot-Detection-Quantized: Multi-task Human detector](https://aihub.qualcomm.com/models/foot_track_net_quantized) -FootTrackNet can detect person and face bounding boxes, head and feet landmark locations and feet visibility. +Real-time multiple person detection with accurate feet localization optimized for mobile and edge. -This is based on the implementation of FootTrackNet_Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/foot_track_net_quantized). @@ -46,7 +45,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License -* The license for the original implementation of FootTrackNet_Quantized can be found +* The license for the original implementation of Person-Foot-Detection-Quantized can be found [here](https://github.com/qcom-ai-hub/ai-hub-models-internal/blob/main/LICENSE). * The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) diff --git a/qai_hub_models/models/foot_track_net_quantized/export.py b/qai_hub_models/models/foot_track_net_quantized/export.py index fdf1da9c..09c281be 100644 --- a/qai_hub_models/models/foot_track_net_quantized/export.py +++ b/qai_hub_models/models/foot_track_net_quantized/export.py @@ -93,7 +93,7 @@ def export_model( if not can_access_qualcomm_ai_hub(): return export_without_hub_access( "foot_track_net_quantized", - "FootTrackNet_Quantized", + "Person-Foot-Detection-Quantized", device or f"Device (Chipset {chipset})", skip_profiling, skip_inferencing, diff --git a/qai_hub_models/models/foot_track_net_quantized/info.yaml b/qai_hub_models/models/foot_track_net_quantized/info.yaml index 20492d33..aa174b5b 100644 --- a/qai_hub_models/models/foot_track_net_quantized/info.yaml +++ b/qai_hub_models/models/foot_track_net_quantized/info.yaml @@ -1,11 +1,10 @@ -name: FootTrackNet_Quantized +name: Person-Foot-Detection-Quantized # id must match with the model dir name in qai_hub_models id: foot_track_net_quantized status: public headline: Multi-task Human detector. domain: Computer Vision -description: FootTrackNet can detect person and face bounding boxes, head and feet landmark - locations and feet visibility. +description: Real-time multiple person detection with accurate feet localization optimized for mobile and edge. use_case: Object Detection tags: - real-time diff --git a/qai_hub_models/models/foot_track_net_quantized/perf.yaml b/qai_hub_models/models/foot_track_net_quantized/perf.yaml index 4fe3cae5..66e8244e 100644 --- a/qai_hub_models/models/foot_track_net_quantized/perf.yaml +++ b/qai_hub_models/models/foot_track_net_quantized/perf.yaml @@ -20,6 +20,8 @@ aggregated: - Snapdragon X Plus 8-Core CRD - QCS6490 (Proxy) - RB3 Gen 2 (Proxy) + - QCS8250 (Proxy) + - RB5 (Proxy) - QCS8450 (Proxy) - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) @@ -36,6 +38,7 @@ aggregated: - Snapdragon® X Elite - Snapdragon® X Plus 8-Core - QCS6490 Proxy + - QCS8250 Proxy - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy @@ -43,14 +46,14 @@ aggregated: - SA8650P Proxy - SA8775P Proxy models: -- name: FootTrackNet_Quantized +- name: Person-Foot-Detection-Quantized performance_metrics: - torchscript_onnx_tflite: inference_time: 1150.0 throughput: 869.5652173913044 estimated_peak_memory_range: - min: 40960 - max: 101515400 + min: 12288 + max: 1462888 primary_compute_unit: NPU precision: int8 layer_info: @@ -58,14 +61,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jpv6v1mz5 + job_id: jglv0wxj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1296.0 - throughput: 771.604938271605 + inference_time: 1287.0 + throughput: 777.000777000777 estimated_peak_memory_range: - min: 16384 - max: 91870248 + min: 946176 + max: 8008984 primary_compute_unit: NPU precision: int8 layer_info: @@ -73,14 +76,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jgjve0y1g + job_id: jgdxr6drp job_status: Passed torchscript_onnx: - inference_time: 1677.0 - throughput: 596.3029218843172 + inference_time: 1637.0 + throughput: 610.8735491753207 estimated_peak_memory_range: min: 12288 - max: 3985600 + max: 3954640 primary_compute_unit: NPU precision: int8 layer_info: @@ -88,7 +91,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: jp2kl7d6p + job_id: jgkel62wg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -97,13 +100,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:45:59Z' - - torchscript_onnx_qnn: - inference_time: 1238.0 - throughput: 807.7544426494346 + timestamp: '2024-11-09T22:56:35Z' + - torchscript_onnx_tflite: + inference_time: 798.0 + throughput: 1253.1328320802006 estimated_peak_memory_range: - min: 966656 - max: 2427224 + min: 12288 + max: 51169280 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 146 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 146 + job_id: j56y3o76p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 882.0 + throughput: 1133.7868480725624 + estimated_peak_memory_range: + min: 937984 + max: 26552016 primary_compute_unit: NPU precision: int8 layer_info: @@ -111,30 +129,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jpedkrx85 + job_id: j57yjoev5 job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:45:49Z' - - torchscript_onnx_qnn: - inference_time: 839.0 - throughput: 1191.8951132300358 + torchscript_onnx: + inference_time: 1133.0 + throughput: 882.61253309797 estimated_peak_memory_range: min: 0 - max: 9053104 + max: 59599552 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 198 + layers_on_npu: 139 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 198 - job_id: j5weqdz45 + total_layers: 139 + job_id: j5q674lnp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -143,13 +153,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-31T14:45:50Z' - - torchscript_onnx_qnn: - inference_time: 902.0 - throughput: 1108.6474501108648 + timestamp: '2024-11-09T22:56:36Z' + - torchscript_onnx_tflite: + inference_time: 684.0 + throughput: 1461.9883040935672 + estimated_peak_memory_range: + min: 8192 + max: 34870384 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 146 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 146 + job_id: jp3j4o93g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 831.0 + throughput: 1203.3694344163657 estimated_peak_memory_range: min: 0 - max: 22053632 + max: 22099952 primary_compute_unit: NPU precision: int8 layer_info: @@ -157,7 +182,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jg9lw32mg + job_id: jp4lxey85 + job_status: Passed + torchscript_onnx: + inference_time: 1121.0 + throughput: 892.0606601248885 + estimated_peak_memory_range: + min: 0 + max: 41725952 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 139 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 139 + job_id: jglv0wyj5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -166,13 +206,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-31T14:45:51Z' - - torchscript_onnx_qnn: - inference_time: 6706.0 - throughput: 149.1201908738443 + timestamp: '2024-11-09T22:56:37Z' + - torchscript_onnx_tflite: + inference_time: 5372.0 + throughput: 186.15040953090096 estimated_peak_memory_range: - min: 946176 - max: 9298960 + min: 0 + max: 37685984 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 146 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 146 + job_id: jgo21drqp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 6758.0 + throughput: 147.9727730097662 + estimated_peak_memory_range: + min: 937984 + max: 9010320 primary_compute_unit: NPU precision: int8 layer_info: @@ -180,7 +235,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jp14ed1np + job_id: jpxk70l35 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -189,13 +244,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-31T14:45:52Z' - - torchscript_onnx_qnn: - inference_time: 1230.0 - throughput: 813.0081300813008 + timestamp: '2024-11-09T22:56:26Z' + - torchscript_onnx_tflite: + inference_time: 26589.0 + throughput: 37.6095377787807 estimated_peak_memory_range: - min: 946176 - max: 2388032 + min: 1343488 + max: 8701392 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 146 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 146 + job_id: jpv612lk5 + job_status: Passed + reference_device_info: + name: RB5 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: QCS8250 Proxy + timestamp: '2024-11-09T22:56:15Z' + - torchscript_onnx_tflite: + inference_time: 1137.0 + throughput: 879.5074758135444 + estimated_peak_memory_range: + min: 16384 + max: 1392488 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 146 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 146 + job_id: jgjv03rvg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1226.0 + throughput: 815.6606851549756 + estimated_peak_memory_range: + min: 954368 + max: 2215384 primary_compute_unit: NPU precision: int8 layer_info: @@ -203,7 +296,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jgdxor46p + job_id: j5mnw90dp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -212,13 +305,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-31T14:45:53Z' - - torchscript_onnx_qnn: - inference_time: 1257.0 - throughput: 795.5449482895783 + timestamp: '2024-11-09T22:56:27Z' + - torchscript_onnx_tflite: + inference_time: 1152.0 + throughput: 868.0555555555555 + estimated_peak_memory_range: + min: 12288 + max: 5173456 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 146 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 146 + job_id: jpedr67o5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1239.0 + throughput: 807.1025020177563 estimated_peak_memory_range: - min: 950272 - max: 2212920 + min: 954368 + max: 3685344 primary_compute_unit: NPU precision: int8 layer_info: @@ -226,7 +334,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jp4lvx425 + job_id: jprv4xl0g job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -235,13 +343,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-31T14:45:55Z' - - torchscript_onnx_qnn: - inference_time: 1245.0 - throughput: 803.2128514056225 + timestamp: '2024-11-09T22:56:29Z' + - torchscript_onnx_tflite: + inference_time: 1150.0 + throughput: 869.5652173913044 estimated_peak_memory_range: - min: 946176 - max: 2383424 + min: 12288 + max: 1402688 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 146 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 146 + job_id: jgz3xzlo5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1240.0 + throughput: 806.4516129032259 + estimated_peak_memory_range: + min: 954368 + max: 2222440 primary_compute_unit: NPU precision: int8 layer_info: @@ -249,7 +372,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jpxky7r85 + job_id: jp2k7orrp job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -258,13 +381,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-31T14:45:56Z' - - torchscript_onnx_qnn: - inference_time: 1242.0 - throughput: 805.1529790660226 + timestamp: '2024-11-09T22:56:30Z' + - torchscript_onnx_tflite: + inference_time: 1189.0 + throughput: 841.0428931875525 estimated_peak_memory_range: - min: 946176 - max: 2386968 + min: 12288 + max: 1472024 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 146 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 146 + job_id: j5wedyl35 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1268.0 + throughput: 788.6435331230284 + estimated_peak_memory_range: + min: 983040 + max: 2402456 primary_compute_unit: NPU precision: int8 layer_info: @@ -272,7 +410,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: j5mn3wk7p + job_id: jpy148o8p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -281,13 +419,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-31T14:45:57Z' - - torchscript_onnx_qnn: - inference_time: 2453.0 - throughput: 407.6640847941296 + timestamp: '2024-11-09T22:56:31Z' + - torchscript_onnx_tflite: + inference_time: 2253.0 + throughput: 443.85264092321347 + estimated_peak_memory_range: + min: 12288 + max: 33290160 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 146 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 146 + job_id: jg9l3ozwg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 2367.0 + throughput: 422.4757076468103 estimated_peak_memory_range: - min: 962560 - max: 6626768 + min: 0 + max: 5694208 primary_compute_unit: NPU precision: int8 layer_info: @@ -295,7 +448,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jgn639qj5 + job_id: jp0z1om95 job_status: Passed reference_device_info: name: SA8295P ADP @@ -304,13 +457,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-31T14:45:58Z' - - torchscript_onnx_qnn: - inference_time: 1561.0 - throughput: 640.6149903907751 + timestamp: '2024-11-09T22:56:33Z' + - torchscript_onnx_tflite: + inference_time: 1370.0 + throughput: 729.92700729927 + estimated_peak_memory_range: + min: 12288 + max: 51152912 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 146 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 146 + job_id: jp14don8p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1570.0 + throughput: 636.9426751592357 estimated_peak_memory_range: min: 937984 - max: 28542576 + max: 28550320 primary_compute_unit: NPU precision: int8 layer_info: @@ -318,7 +486,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jprve4dkg + job_id: jp8q3jekp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -327,7 +495,7 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-31T14:45:59Z' + timestamp: '2024-11-09T22:56:34Z' - torchscript_onnx_qnn: inference_time: 1421.0 throughput: 703.7297677691766 @@ -341,7 +509,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: j57yxjnn5 + job_id: jgn691zk5 + job_status: Passed + torchscript_onnx: + inference_time: 1740.0 + throughput: 574.7126436781609 + estimated_peak_memory_range: + min: 8556544 + max: 8556544 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 139 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 139 + job_id: j56y3o86p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -350,4 +533,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-31T14:45:53Z' + timestamp: '2024-11-09T22:56:38Z' diff --git a/qai_hub_models/models/gear_guard_net/README.md b/qai_hub_models/models/gear_guard_net/README.md index cd7ecdda..9f686ed7 100644 --- a/qai_hub_models/models/gear_guard_net/README.md +++ b/qai_hub_models/models/gear_guard_net/README.md @@ -5,8 +5,7 @@ Detect if a person is wearing personal protective equipments (PPE) in real-time. -This is based on the implementation of PPE-Detection found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/gear_guard_net). diff --git a/qai_hub_models/models/gear_guard_net/perf.yaml b/qai_hub_models/models/gear_guard_net/perf.yaml index 1d00750f..29cec2e6 100644 --- a/qai_hub_models/models/gear_guard_net/perf.yaml +++ b/qai_hub_models/models/gear_guard_net/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,55 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: PPE-Detection performance_metrics: - torchscript_onnx_tflite: - inference_time: 672.0 - throughput: 1488.095238095238 + inference_time: 683.0 + throughput: 1464.1288433382138 estimated_peak_memory_range: - min: 0 - max: 228919048 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 77 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 77 - job_id: j5q6rqk4p - job_status: Passed - torchscript_onnx: - inference_time: 1127.0 - throughput: 887.3114463176574 - estimated_peak_memory_range: - min: 36864 - max: 213461360 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 108 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 108 - job_id: jp2k9yn4p - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:08:58Z' - - torchscript_onnx_tflite: - inference_time: 668.0 - throughput: 1497.005988023952 - estimated_peak_memory_range: - min: 24576 - max: 2169336 + min: 12288 + max: 2060256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -91,37 +55,37 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 77 - job_id: jglv2mz85 + job_id: jglv0wl85 job_status: Passed torchscript_onnx_qnn: - inference_time: 957.0 - throughput: 1044.932079414838 + inference_time: 754.0 + throughput: 1326.2599469496022 estimated_peak_memory_range: min: 16384 - max: 50864648 + max: 62032200 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 104 + layers_on_npu: 103 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jg9lynelg + total_layers: 103 + job_id: jg9l3omlg job_status: Passed torchscript_onnx: - inference_time: 1130.0 - throughput: 884.9557522123894 + inference_time: 1076.0 + throughput: 929.368029739777 estimated_peak_memory_range: min: 12288 - max: 15376432 + max: 15467704 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 108 + layers_on_npu: 107 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 108 - job_id: jpy1j307p + total_layers: 107 + job_id: j5mnw9mdp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -130,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:08:59Z' + timestamp: '2024-11-09T22:55:39Z' - torchscript_onnx_tflite: - inference_time: 569.0 - throughput: 1757.469244288225 + inference_time: 495.0 + throughput: 2020.20202020202 estimated_peak_memory_range: min: 16384 - max: 43653744 + max: 43873504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -144,37 +108,37 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 77 - job_id: j56yz4j0p + job_id: j56y3ow0p job_status: Passed torchscript_onnx_qnn: - inference_time: 758.0 - throughput: 1319.2612137203166 + inference_time: 542.0 + throughput: 1845.018450184502 estimated_peak_memory_range: - min: 753664 - max: 18169760 + min: 0 + max: 16629360 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 104 + layers_on_npu: 103 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jp14wzx2p + total_layers: 103 + job_id: jp14doj2p job_status: Passed torchscript_onnx: - inference_time: 971.0 - throughput: 1029.8661174047375 + inference_time: 845.0 + throughput: 1183.4319526627219 estimated_peak_memory_range: - min: 540672 - max: 49033856 + min: 0 + max: 49144480 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 108 + layers_on_npu: 107 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 108 - job_id: jp0z20765 + total_layers: 107 + job_id: jgn691nk5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -183,13 +147,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:09:00Z' + timestamp: '2024-11-09T22:55:40Z' - torchscript_onnx_tflite: - inference_time: 505.0 - throughput: 1980.1980198019803 + inference_time: 503.0 + throughput: 1988.0715705765408 estimated_peak_memory_range: min: 8192 - max: 22981264 + max: 22616832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -197,37 +161,37 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 77 - job_id: jgz3jd9k5 + job_id: jp3j4o6lg job_status: Passed torchscript_onnx_qnn: - inference_time: 653.0 - throughput: 1531.3935681470139 + inference_time: 527.0 + throughput: 1897.5332068311195 estimated_peak_memory_range: min: 749568 - max: 14912128 + max: 13975840 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 104 + layers_on_npu: 103 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jprv23j9g + total_layers: 103 + job_id: jgdxr63ep job_status: Passed torchscript_onnx: - inference_time: 745.0 - throughput: 1342.2818791946308 + inference_time: 710.0 + throughput: 1408.4507042253522 estimated_peak_memory_range: min: 0 - max: 25178400 + max: 24673760 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 108 + layers_on_npu: 107 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 108 - job_id: jgkeqxm2g + total_layers: 107 + job_id: jprv4x00g job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -236,13 +200,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:09:03Z' + timestamp: '2024-11-09T22:55:41Z' - torchscript_onnx_tflite: - inference_time: 665.0 - throughput: 1503.7593984962407 + inference_time: 670.0 + throughput: 1492.5373134328358 estimated_peak_memory_range: - min: 16384 - max: 1315640 + min: 12288 + max: 1285112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -250,22 +214,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 77 - job_id: jp3j103lg + job_id: jgo21d8xp job_status: Passed torchscript_onnx_qnn: - inference_time: 895.0 - throughput: 1117.31843575419 + inference_time: 727.0 + throughput: 1375.515818431912 estimated_peak_memory_range: - min: 774144 - max: 2039456 + min: 770048 + max: 2462456 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 104 + layers_on_npu: 103 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jgdxq1lep + total_layers: 103 + job_id: j5wedy735 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -274,13 +238,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:08:51Z' + timestamp: '2024-11-09T22:55:31Z' - torchscript_onnx_tflite: - inference_time: 675.0 - throughput: 1481.4814814814815 + inference_time: 673.0 + throughput: 1485.8841010401188 estimated_peak_memory_range: - min: 16384 - max: 2169616 + min: 12288 + max: 1539744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -288,22 +252,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 77 - job_id: jp4ldow25 + job_id: jpv6127j5 job_status: Passed torchscript_onnx_qnn: - inference_time: 900.0 - throughput: 1111.111111111111 + inference_time: 726.0 + throughput: 1377.4104683195592 estimated_peak_memory_range: - min: 786432 - max: 2095672 + min: 761856 + max: 2030720 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 104 + layers_on_npu: 103 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jgn6myej5 + total_layers: 103 + job_id: jp14doj8p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -312,13 +276,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:44:44Z' + timestamp: '2024-11-09T22:55:34Z' - torchscript_onnx_tflite: - inference_time: 672.0 - throughput: 1488.095238095238 + inference_time: 671.0 + throughput: 1490.312965722802 estimated_peak_memory_range: - min: 24576 - max: 17604560 + min: 32768 + max: 140277296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -326,22 +290,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 77 - job_id: jpxk6j185 + job_id: jgjv03qxg job_status: Passed torchscript_onnx_qnn: - inference_time: 907.0 - throughput: 1102.5358324145534 + inference_time: 727.0 + throughput: 1375.515818431912 estimated_peak_memory_range: - min: 28672 - max: 1339304 + min: 770048 + max: 2015472 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 104 + layers_on_npu: 103 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jprv2qykg + total_layers: 103 + job_id: jgdxr63rp job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -350,13 +314,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:44:45Z' + timestamp: '2024-11-09T22:55:35Z' - torchscript_onnx_tflite: - inference_time: 679.0 - throughput: 1472.7540500736377 + inference_time: 684.0 + throughput: 1461.9883040935672 estimated_peak_memory_range: min: 24576 - max: 253397152 + max: 1482952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -364,22 +328,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 77 - job_id: j5mn62z7p + job_id: jpedr6y15 job_status: Passed torchscript_onnx_qnn: - inference_time: 913.0 - throughput: 1095.290251916758 + inference_time: 729.0 + throughput: 1371.7421124828531 estimated_peak_memory_range: - min: 679936 - max: 2217480 + min: 782336 + max: 2323328 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 104 + layers_on_npu: 103 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jp2k96m6p + total_layers: 103 + job_id: j57yjo4v5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -388,13 +352,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:44:46Z' + timestamp: '2024-11-09T22:55:36Z' + - torchscript_onnx_tflite: + inference_time: 1813.0 + throughput: 551.5719801434087 + estimated_peak_memory_range: + min: 16384 + max: 18292848 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 77 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 77 + job_id: jgz3xznk5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 2031.0 + throughput: 492.36829148202855 + estimated_peak_memory_range: + min: 0 + max: 5874208 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 103 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 103 + job_id: jp4lxe185 + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:55:37Z' - torchscript_onnx_tflite: - inference_time: 1415.0 - throughput: 706.7137809187279 + inference_time: 1411.0 + throughput: 708.7172218284904 estimated_peak_memory_range: - min: 118784 - max: 41744560 + min: 135168 + max: 38560064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -402,22 +404,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 77 - job_id: jpedwm115 + job_id: j5wedy765 job_status: Passed torchscript_onnx_qnn: - inference_time: 1780.0 - throughput: 561.7977528089888 + inference_time: 1485.0 + throughput: 673.4006734006734 estimated_peak_memory_range: min: 753664 - max: 17368112 + max: 17476256 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 104 + layers_on_npu: 103 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jgn6mv8r5 + total_layers: 103 + job_id: jpxk70435 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -426,36 +428,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:08:56Z' + timestamp: '2024-11-09T22:55:38Z' - torchscript_onnx_qnn: - inference_time: 1039.0 - throughput: 962.4639076034649 + inference_time: 860.0 + throughput: 1162.7906976744187 estimated_peak_memory_range: min: 737280 max: 737280 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 104 + layers_on_npu: 103 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: j57ylr3l5 + total_layers: 103 + job_id: jg9l3omwg job_status: Passed torchscript_onnx: - inference_time: 1267.0 - throughput: 789.2659826361484 + inference_time: 1172.0 + throughput: 853.2423208191126 estimated_peak_memory_range: - min: 13324288 - max: 13324288 + min: 14602240 + max: 14602240 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 108 + layers_on_npu: 107 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 108 - job_id: jp8qmyvxp + total_layers: 107 + job_id: jp2k7owrp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -464,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:09:02Z' + timestamp: '2024-11-09T22:55:42Z' diff --git a/qai_hub_models/models/gear_guard_net_quantized/README.md b/qai_hub_models/models/gear_guard_net_quantized/README.md index 9cdc45d8..fe5f83b1 100644 --- a/qai_hub_models/models/gear_guard_net_quantized/README.md +++ b/qai_hub_models/models/gear_guard_net_quantized/README.md @@ -5,8 +5,7 @@ Detect if a person is wearing personal protective equipments (PPE) in real-time. -This is based on the implementation of PPE-Detection-Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/gear_guard_net_quantized). diff --git a/qai_hub_models/models/gear_guard_net_quantized/demo.py b/qai_hub_models/models/gear_guard_net_quantized/demo.py index a76fa898..6a4afaef 100644 --- a/qai_hub_models/models/gear_guard_net_quantized/demo.py +++ b/qai_hub_models/models/gear_guard_net_quantized/demo.py @@ -4,8 +4,8 @@ # --------------------------------------------------------------------- from qai_hub_models.models._shared.body_detection.app import BodyDetectionApp from qai_hub_models.models._shared.body_detection.demo import BodyDetectionDemo +from qai_hub_models.models.gear_guard_net.model import MODEL_ASSET_VERSION from qai_hub_models.models.gear_guard_net_quantized.model import ( - MODEL_ASSET_VERSION, MODEL_ID, GearGuardNetQuantizable, ) diff --git a/qai_hub_models/models/gear_guard_net_quantized/model.py b/qai_hub_models/models/gear_guard_net_quantized/model.py index d80e0b7c..f9cd75ac 100644 --- a/qai_hub_models/models/gear_guard_net_quantized/model.py +++ b/qai_hub_models/models/gear_guard_net_quantized/model.py @@ -20,7 +20,7 @@ from qai_hub_models.utils.asset_loaders import CachedWebModelAsset MODEL_ID = __name__.split(".")[-2] -MODEL_ASSET_VERSION = 1 +MODEL_ASSET_VERSION = 2 DEFAULT_ENCODINGS = "encodings.json" diff --git a/qai_hub_models/models/gear_guard_net_quantized/perf.yaml b/qai_hub_models/models/gear_guard_net_quantized/perf.yaml index 843dc5ae..1bfea50b 100644 --- a/qai_hub_models/models/gear_guard_net_quantized/perf.yaml +++ b/qai_hub_models/models/gear_guard_net_quantized/perf.yaml @@ -49,102 +49,49 @@ models: - name: PPE-Detection-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 471.0 - throughput: 2123.1422505307855 + inference_time: 247.0 + throughput: 4048.582995951417 estimated_peak_memory_range: - min: 12288 - max: 1452824 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 89 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 89 - job_id: jp2k9y8mp - job_status: Passed - torchscript_onnx_qnn: - inference_time: 341.0 - throughput: 2932.551319648094 - estimated_peak_memory_range: - min: 16384 - max: 13420752 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 104 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 104 - job_id: jpedwm905 - job_status: Passed - torchscript_onnx: - inference_time: 620.0 - throughput: 1612.9032258064517 - estimated_peak_memory_range: - min: 12288 - max: 8800592 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 118 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 118 - job_id: jpxk6oj15 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:08:16Z' - - torchscript_onnx_tflite: - inference_time: 476.0 - throughput: 2100.840336134454 - estimated_peak_memory_range: - min: 53248 - max: 126310936 + min: 0 + max: 1424272 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 89 + layers_on_npu: 86 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 89 - job_id: jpy1j3e4p + total_layers: 86 + job_id: jprv4xn9g job_status: Passed torchscript_onnx_qnn: - inference_time: 339.0 - throughput: 2949.8525073746314 + inference_time: 310.0 + throughput: 3225.8064516129034 estimated_peak_memory_range: - min: 12288 - max: 13275752 + min: 28672 + max: 13535048 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 104 + layers_on_npu: 103 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jgz3jde65 + total_layers: 103 + job_id: jgjv03wxg job_status: Passed torchscript_onnx: - inference_time: 625.0 - throughput: 1600.0 + inference_time: 634.0 + throughput: 1577.2870662460568 estimated_peak_memory_range: min: 12288 - max: 8756872 + max: 9218976 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 118 + layers_on_npu: 117 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 118 - job_id: j5mn6x2wp + total_layers: 117 + job_id: jgn691kr5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -153,51 +100,51 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:08:17Z' + timestamp: '2024-11-09T22:54:58Z' - torchscript_onnx_tflite: - inference_time: 338.0 - throughput: 2958.579881656805 + inference_time: 182.0 + throughput: 5494.505494505494 estimated_peak_memory_range: min: 12288 - max: 44730352 + max: 42729616 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 89 + layers_on_npu: 86 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 89 - job_id: jp0z20ye5 + total_layers: 86 + job_id: jp2k7ov4p job_status: Passed torchscript_onnx_qnn: - inference_time: 256.0 - throughput: 3906.25 + inference_time: 231.0 + throughput: 4329.004329004329 estimated_peak_memory_range: - min: 196608 - max: 15807920 + min: 200704 + max: 15219872 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 104 + layers_on_npu: 103 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: j5we36oj5 + total_layers: 103 + job_id: jpedr6l15 job_status: Passed torchscript_onnx: - inference_time: 450.0 - throughput: 2222.222222222222 + inference_time: 441.0 + throughput: 2267.573696145125 estimated_peak_memory_range: - min: 196608 - max: 53545312 + min: 0 + max: 52545840 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 118 + layers_on_npu: 117 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 118 - job_id: jgn6mvyr5 + total_layers: 117 + job_id: jprv4xw9g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -206,51 +153,51 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:08:18Z' + timestamp: '2024-11-09T22:54:59Z' - torchscript_onnx_tflite: - inference_time: 337.0 - throughput: 2967.359050445104 + inference_time: 185.0 + throughput: 5405.405405405405 estimated_peak_memory_range: min: 8192 - max: 23225792 + max: 21538592 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 89 + layers_on_npu: 86 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 89 - job_id: jpv6rk3m5 + total_layers: 86 + job_id: jpy14877p job_status: Passed torchscript_onnx_qnn: - inference_time: 254.0 - throughput: 3937.0078740157483 + inference_time: 228.0 + throughput: 4385.964912280701 estimated_peak_memory_range: min: 0 - max: 14006880 + max: 12924880 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 104 + layers_on_npu: 103 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: j57ylrwl5 + total_layers: 103 + job_id: jgz3xz4k5 job_status: Passed torchscript_onnx: - inference_time: 487.0 - throughput: 2053.388090349076 + inference_time: 484.0 + throughput: 2066.115702479339 estimated_peak_memory_range: min: 0 - max: 30198496 + max: 29453488 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 118 + layers_on_npu: 117 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 118 - job_id: jp2k9y64p + total_layers: 117 + job_id: jp2k7oe4p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -259,36 +206,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:08:20Z' + timestamp: '2024-11-09T22:55:00Z' - torchscript_onnx_tflite: - inference_time: 3737.0 - throughput: 267.5943270002676 + inference_time: 1226.0 + throughput: 815.6606851549756 estimated_peak_memory_range: - min: 741376 - max: 28747328 + min: 36864 + max: 22062432 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 88 - layers_on_gpu: 1 + layers_on_npu: 86 + layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 89 - job_id: jp8qmyo8p + total_layers: 86 + job_id: jp0z1ov65 job_status: Passed torchscript_onnx_qnn: - inference_time: 1841.0 - throughput: 543.1830526887561 + inference_time: 1774.0 + throughput: 563.6978579481398 estimated_peak_memory_range: min: 12288 - max: 8282256 + max: 8352112 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 104 + layers_on_npu: 103 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jg9lynvvg + total_layers: 103 + job_id: j5wedy465 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -297,21 +244,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-27T00:08:06Z' + timestamp: '2024-11-09T22:54:49Z' - torchscript_onnx_tflite: - inference_time: 8314.0 - throughput: 120.27904738994467 + inference_time: 4945.0 + throughput: 202.22446916076845 estimated_peak_memory_range: - min: 749568 - max: 7149360 + min: 40960 + max: 7641664 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 88 - layers_on_gpu: 1 + layers_on_npu: 86 + layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 89 - job_id: jgkeqxzog + total_layers: 86 + job_id: jp8q3j4xp job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -320,36 +267,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-27T00:07:54Z' + timestamp: '2024-11-09T22:54:38Z' - torchscript_onnx_tflite: - inference_time: 475.0 - throughput: 2105.2631578947367 + inference_time: 256.0 + throughput: 3906.25 estimated_peak_memory_range: min: 12288 - max: 1326224 + max: 1286744 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 89 + layers_on_npu: 86 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 89 - job_id: j5q6rq8mp + total_layers: 86 + job_id: jgkel692g job_status: Passed torchscript_onnx_qnn: - inference_time: 316.0 - throughput: 3164.5569620253164 + inference_time: 301.0 + throughput: 3322.2591362126245 estimated_peak_memory_range: - min: 212992 - max: 1878624 + min: 217088 + max: 1354120 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 104 + layers_on_npu: 103 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jp14wz0lp + total_layers: 103 + job_id: jg9l3odlg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -358,36 +305,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:08:07Z' + timestamp: '2024-11-09T22:54:50Z' - torchscript_onnx_tflite: - inference_time: 477.0 - throughput: 2096.4360587002097 + inference_time: 255.0 + throughput: 3921.5686274509803 estimated_peak_memory_range: min: 12288 - max: 1386624 + max: 15045016 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 89 + layers_on_npu: 86 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 89 - job_id: jpedw1vv5 + total_layers: 86 + job_id: j5q674m4p job_status: Passed torchscript_onnx_qnn: - inference_time: 323.0 - throughput: 3095.9752321981423 + inference_time: 307.0 + throughput: 3257.328990228013 estimated_peak_memory_range: - min: 245760 - max: 1606128 + min: 217088 + max: 1849536 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 104 + layers_on_npu: 103 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jp14wl87p + total_layers: 103 + job_id: jgdxr62ep job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -396,36 +343,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:44:23Z' + timestamp: '2024-11-09T22:54:53Z' - torchscript_onnx_tflite: - inference_time: 475.0 - throughput: 2105.2631578947367 + inference_time: 250.0 + throughput: 4000.0 estimated_peak_memory_range: - min: 12288 - max: 116057696 + min: 16384 + max: 1416288 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 89 + layers_on_npu: 86 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 89 - job_id: jgz3j97x5 + total_layers: 86 + job_id: j56y3od0p job_status: Passed torchscript_onnx_qnn: - inference_time: 339.0 - throughput: 2949.8525073746314 + inference_time: 306.0 + throughput: 3267.97385620915 estimated_peak_memory_range: - min: 217088 - max: 1900888 + min: 212992 + max: 1808224 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 104 + layers_on_npu: 103 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jgdxq9vzp + total_layers: 103 + job_id: j57yjo9l5 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -434,36 +381,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:44:24Z' + timestamp: '2024-11-09T22:54:54Z' - torchscript_onnx_tflite: - inference_time: 480.0 - throughput: 2083.3333333333335 + inference_time: 244.0 + throughput: 4098.360655737705 estimated_peak_memory_range: min: 12288 - max: 1349960 + max: 1331312 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 89 + layers_on_npu: 86 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 89 - job_id: jg9ly148g + total_layers: 86 + job_id: jp3j4owlg job_status: Passed torchscript_onnx_qnn: - inference_time: 320.0 - throughput: 3125.0 + inference_time: 306.0 + throughput: 3267.97385620915 estimated_peak_memory_range: min: 217088 - max: 1564704 + max: 1768400 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 104 + layers_on_npu: 103 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jg9ly14mg + total_layers: 103 + job_id: jp4lxe3v5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -472,36 +419,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:44:26Z' + timestamp: '2024-11-09T22:54:55Z' - torchscript_onnx_tflite: - inference_time: 1101.0 - throughput: 908.2652134423251 + inference_time: 703.0 + throughput: 1422.475106685633 estimated_peak_memory_range: min: 12288 - max: 21726112 + max: 21135696 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 89 + layers_on_npu: 86 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 89 - job_id: j5we3v9m5 + total_layers: 86 + job_id: jgo21d4xp job_status: Passed torchscript_onnx_qnn: - inference_time: 959.0 - throughput: 1042.752867570386 + inference_time: 904.0 + throughput: 1106.1946902654868 estimated_peak_memory_range: min: 200704 - max: 6141360 + max: 6136416 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 104 + layers_on_npu: 103 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: j5we3v945 + total_layers: 103 + job_id: jpxk70x15 job_status: Passed reference_device_info: name: SA8295P ADP @@ -510,36 +457,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T01:44:25Z' + timestamp: '2024-11-09T22:54:56Z' - torchscript_onnx_tflite: - inference_time: 686.0 - throughput: 1457.725947521866 + inference_time: 366.0 + throughput: 2732.24043715847 estimated_peak_memory_range: - min: 12288 - max: 45483600 + min: 16384 + max: 43220432 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 89 + layers_on_npu: 86 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 89 - job_id: jgo2n6ydp + total_layers: 86 + job_id: jpv6129j5 job_status: Passed torchscript_onnx_qnn: - inference_time: 452.0 - throughput: 2212.3893805309735 + inference_time: 416.0 + throughput: 2403.846153846154 estimated_peak_memory_range: min: 196608 - max: 18409584 + max: 18329600 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 104 + layers_on_npu: 103 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jgdxq19ep + total_layers: 103 + job_id: j5mnw98wp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -548,36 +495,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:08:12Z' + timestamp: '2024-11-09T22:54:57Z' - torchscript_onnx_qnn: - inference_time: 442.0 - throughput: 2262.443438914027 + inference_time: 418.0 + throughput: 2392.3444976076553 estimated_peak_memory_range: - min: 512000 - max: 512000 + min: 487424 + max: 487424 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 104 + layers_on_npu: 103 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 104 - job_id: jgdxq1wlp + total_layers: 103 + job_id: jp14do62p job_status: Passed torchscript_onnx: - inference_time: 663.0 - throughput: 1508.2956259426849 + inference_time: 653.0 + throughput: 1531.3935681470139 estimated_peak_memory_range: - min: 7270400 - max: 7270400 + min: 9203712 + max: 9203712 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 118 + layers_on_npu: 117 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 118 - job_id: jprv23q9g + total_layers: 117 + job_id: jpy148m7p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -586,4 +533,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:08:19Z' + timestamp: '2024-11-09T22:55:02Z' diff --git a/qai_hub_models/models/googlenet/README.md b/qai_hub_models/models/googlenet/README.md index fc59c1fd..76efc69a 100644 --- a/qai_hub_models/models/googlenet/README.md +++ b/qai_hub_models/models/googlenet/README.md @@ -5,8 +5,7 @@ GoogLeNet is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of GoogLeNet found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/googlenet). diff --git a/qai_hub_models/models/googlenet/perf.yaml b/qai_hub_models/models/googlenet/perf.yaml index a0e4fab6..e159573c 100644 --- a/qai_hub_models/models/googlenet/perf.yaml +++ b/qai_hub_models/models/googlenet/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,55 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: GoogLeNet performance_metrics: - - torchscript_onnx_tflite: - inference_time: 1013.0 - throughput: 987.1668311944719 - estimated_peak_memory_range: - min: 45056 - max: 1535840 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 84 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 84 - job_id: jp4ldrzl5 - job_status: Passed - torchscript_onnx: - inference_time: 1160.0 - throughput: 862.0689655172414 - estimated_peak_memory_range: - min: 12288 - max: 1476808 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 145 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 145 - job_id: jg9lyn6vg - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:07:20Z' - torchscript_onnx_tflite: inference_time: 1014.0 throughput: 986.1932938856016 estimated_peak_memory_range: - min: 28672 - max: 192016304 + min: 12288 + max: 195594704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -91,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jpxk6ow95 + job_id: jp4lxemv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1082.0 - throughput: 924.2144177449168 + inference_time: 1084.0 + throughput: 922.509225092251 estimated_peak_memory_range: - min: 12288 - max: 25079160 + min: 16384 + max: 24745544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: j5q6rqjmp + job_id: jgkel6o2g job_status: Passed torchscript_onnx: - inference_time: 1146.0 - throughput: 872.6003490401396 + inference_time: 1145.0 + throughput: 873.3624454148471 estimated_peak_memory_range: - min: 622592 - max: 2306144 + min: 20480 + max: 185372008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jp14wzrlp + job_id: j5wedy165 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -130,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:07:21Z' + timestamp: '2024-11-09T22:54:04Z' - torchscript_onnx_tflite: - inference_time: 744.0 - throughput: 1344.0860215053763 + inference_time: 636.0 + throughput: 1572.3270440251572 estimated_peak_memory_range: min: 16384 - max: 52536144 + max: 52244288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -144,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: j5mn6xjqp + job_id: jpxk70315 job_status: Passed torchscript_onnx_qnn: - inference_time: 788.0 - throughput: 1269.0355329949239 + inference_time: 687.0 + throughput: 1455.604075691412 estimated_peak_memory_range: - min: 299008 - max: 14169888 + min: 0 + max: 16487808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jglv2mjl5 + job_id: j5q674z4p job_status: Passed torchscript_onnx: - inference_time: 891.0 - throughput: 1122.334455667789 + inference_time: 784.0 + throughput: 1275.5102040816328 estimated_peak_memory_range: min: 0 - max: 55351856 + max: 58581488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,7 +138,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jgdxq1jlp + job_id: jg9l3oxlg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -183,13 +147,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:07:22Z' + timestamp: '2024-11-09T22:54:05Z' - torchscript_onnx_tflite: - inference_time: 552.0 - throughput: 1811.5942028985507 + inference_time: 563.0 + throughput: 1776.1989342806394 estimated_peak_memory_range: - min: 8192 - max: 19847136 + min: 12288 + max: 19732448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -197,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jp8qmyl8p + job_id: j5mnw9owp job_status: Passed torchscript_onnx_qnn: - inference_time: 723.0 - throughput: 1383.1258644536654 + inference_time: 724.0 + throughput: 1381.2154696132598 estimated_peak_memory_range: min: 614400 - max: 13441408 + max: 13258560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: j5we36jj5 + job_id: jglv0wo85 job_status: Passed torchscript_onnx: inference_time: 847.0 throughput: 1180.637544273908 estimated_peak_memory_range: - min: 0 - max: 20506432 + min: 114688 + max: 20682224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,7 +191,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jp4ldrql5 + job_id: jp14dov2p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -236,13 +200,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:07:25Z' + timestamp: '2024-11-09T22:54:06Z' - torchscript_onnx_tflite: - inference_time: 1010.0 - throughput: 990.0990099009902 + inference_time: 1012.0 + throughput: 988.1422924901186 estimated_peak_memory_range: - min: 28672 - max: 1458568 + min: 12288 + max: 9160328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -250,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jgn6mvjm5 + job_id: jgn691or5 job_status: Passed torchscript_onnx_qnn: - inference_time: 896.0 - throughput: 1116.0714285714287 + inference_time: 906.0 + throughput: 1103.7527593818984 estimated_peak_memory_range: - min: 630784 - max: 1877456 + min: 634880 + max: 2144024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,7 +229,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: j56yz4k7p + job_id: j56y3or0p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -274,13 +238,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:07:13Z' + timestamp: '2024-11-09T22:53:56Z' - torchscript_onnx_tflite: - inference_time: 1010.0 - throughput: 990.0990099009902 + inference_time: 1016.0 + throughput: 984.2519685039371 estimated_peak_memory_range: - min: 20480 - max: 3626296 + min: 24576 + max: 2239280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -288,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jp8qm9zop + job_id: jprv4xo9g job_status: Passed torchscript_onnx_qnn: - inference_time: 901.0 - throughput: 1109.8779134295228 + inference_time: 902.0 + throughput: 1108.6474501108648 estimated_peak_memory_range: - min: 634880 - max: 2001576 + min: 651264 + max: 1899112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,7 +267,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jglv2zkm5 + job_id: jgo21doxp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -312,13 +276,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:43:53Z' + timestamp: '2024-11-09T22:53:58Z' - torchscript_onnx_tflite: - inference_time: 1009.0 - throughput: 991.0802775024777 + inference_time: 1010.0 + throughput: 990.0990099009902 estimated_peak_memory_range: - min: 0 - max: 35905536 + min: 16384 + max: 2164032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -326,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jgkeqn3ng + job_id: jp2k7o44p job_status: Passed torchscript_onnx_qnn: - inference_time: 894.0 - throughput: 1118.5682326621925 + inference_time: 901.0 + throughput: 1109.8779134295228 estimated_peak_memory_range: - min: 626688 - max: 2139464 + min: 684032 + max: 2124832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,7 +305,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: j56yzj1yp + job_id: jpv612ej5 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -350,13 +314,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:43:54Z' + timestamp: '2024-11-09T22:53:59Z' - torchscript_onnx_tflite: - inference_time: 1013.0 - throughput: 987.1668311944719 + inference_time: 1012.0 + throughput: 988.1422924901186 estimated_peak_memory_range: min: 20480 - max: 1506008 + max: 150493896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -364,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: j5q6rk3op + job_id: jpy148q7p job_status: Passed torchscript_onnx_qnn: - inference_time: 896.0 - throughput: 1116.0714285714287 + inference_time: 901.0 + throughput: 1109.8779134295228 estimated_peak_memory_range: - min: 651264 - max: 2298608 + min: 630784 + max: 1813320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,7 +343,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jp3j13mng + job_id: jgjv03oxg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -388,13 +352,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:43:55Z' + timestamp: '2024-11-09T22:54:00Z' - torchscript_onnx_tflite: - inference_time: 1486.0 - throughput: 672.9475100942127 + inference_time: 2010.0 + throughput: 497.5124378109453 estimated_peak_memory_range: - min: 20480 - max: 53593152 + min: 12288 + max: 18947952 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 84 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 84 + job_id: jp0z1od65 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1977.0 + throughput: 505.8168942842691 + estimated_peak_memory_range: + min: 0 + max: 5951584 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 143 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 143 + job_id: jpedr6815 + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:54:01Z' + - torchscript_onnx_tflite: + inference_time: 1490.0 + throughput: 671.1409395973154 + estimated_peak_memory_range: + min: 16384 + max: 52817936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -402,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jp0z20ne5 + job_id: jp8q3j6xp job_status: Passed torchscript_onnx_qnn: - inference_time: 1549.0 - throughput: 645.577792123951 + inference_time: 1560.0 + throughput: 641.025641025641 estimated_peak_memory_range: min: 618496 - max: 19981328 + max: 18275584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jgz3jd165 + job_id: jgz3xz8k5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -426,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:07:18Z' + timestamp: '2024-11-09T22:54:03Z' - torchscript_onnx_qnn: - inference_time: 1069.0 - throughput: 935.4536950420954 + inference_time: 1064.0 + throughput: 939.8496240601504 estimated_peak_memory_range: min: 602112 max: 602112 @@ -440,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jgo2n6jdp + job_id: jp3j4oxlg job_status: Passed torchscript_onnx: - inference_time: 1311.0 - throughput: 762.7765064836003 + inference_time: 1323.0 + throughput: 755.8578987150415 estimated_peak_memory_range: - min: 14544896 - max: 14544896 + min: 16084992 + max: 16084992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -455,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: j57ylrzr5 + job_id: jgdxr6zep job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -464,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:07:24Z' + timestamp: '2024-11-09T22:54:07Z' diff --git a/qai_hub_models/models/googlenet_quantized/README.md b/qai_hub_models/models/googlenet_quantized/README.md index bef9fc8f..747c6636 100644 --- a/qai_hub_models/models/googlenet_quantized/README.md +++ b/qai_hub_models/models/googlenet_quantized/README.md @@ -5,8 +5,7 @@ GoogLeNet is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of GoogLeNetQuantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/googlenet_quantized). diff --git a/qai_hub_models/models/googlenet_quantized/perf.yaml b/qai_hub_models/models/googlenet_quantized/perf.yaml index 017a5f49..3c073aea 100644 --- a/qai_hub_models/models/googlenet_quantized/perf.yaml +++ b/qai_hub_models/models/googlenet_quantized/perf.yaml @@ -49,11 +49,11 @@ models: - name: GoogLeNetQuantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 282.0 - throughput: 3546.099290780142 + inference_time: 290.0 + throughput: 3448.2758620689656 estimated_peak_memory_range: - min: 12288 - max: 1579528 + min: 20480 + max: 113695720 primary_compute_unit: NPU precision: int8 layer_info: @@ -61,67 +61,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jgn6ml3k5 + job_id: jg9l370mg job_status: Passed torchscript_onnx_qnn: - inference_time: 339.0 - throughput: 2949.8525073746314 + inference_time: 345.0 + throughput: 2898.550724637681 estimated_peak_memory_range: min: 16384 - max: 73276112 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 143 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 143 - job_id: jpv6r0wk5 - job_status: Passed - torchscript_onnx: - inference_time: 498.0 - throughput: 2008.0321285140562 - estimated_peak_memory_range: - min: 12288 - max: 10121064 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 91 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 91 - job_id: jgn6mlek5 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:52:34Z' - - torchscript_onnx_tflite: - inference_time: 285.0 - throughput: 3508.7719298245615 - estimated_peak_memory_range: - min: 12288 - max: 1380368 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 86 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 86 - job_id: jprv28e0g - job_status: Passed - torchscript_onnx_qnn: - inference_time: 341.0 - throughput: 2932.551319648094 - estimated_peak_memory_range: - min: 12288 - max: 9946336 + max: 31645968 primary_compute_unit: NPU precision: int8 layer_info: @@ -129,22 +76,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jgjv2zlvg - job_status: Passed - torchscript_onnx: - inference_time: 494.0 - throughput: 2024.2914979757086 - estimated_peak_memory_range: - min: 12288 - max: 9890760 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 91 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 91 - job_id: jprv28y0g + job_id: jp0z18305 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -153,13 +85,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:52:35Z' + timestamp: '2024-11-09T23:48:06Z' - torchscript_onnx_tflite: - inference_time: 209.0 - throughput: 4784.688995215311 + inference_time: 206.0 + throughput: 4854.368932038835 estimated_peak_memory_range: min: 12288 - max: 39379760 + max: 39206512 primary_compute_unit: NPU precision: int8 layer_info: @@ -167,14 +99,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jp2k90lrp + job_id: jp14dk2np job_status: Passed torchscript_onnx_qnn: - inference_time: 252.0 - throughput: 3968.253968253968 + inference_time: 253.0 + throughput: 3952.5691699604745 estimated_peak_memory_range: - min: 159744 - max: 14548624 + min: 0 + max: 17682800 primary_compute_unit: NPU precision: int8 layer_info: @@ -182,22 +114,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jpedwevo5 - job_status: Passed - torchscript_onnx: - inference_time: 369.0 - throughput: 2710.027100271003 - estimated_peak_memory_range: - min: 0 - max: 59953584 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 91 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 91 - job_id: jp2k90mrp + job_id: jp8q3d0qp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -206,13 +123,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:52:37Z' + timestamp: '2024-11-09T23:48:08Z' - torchscript_onnx_tflite: inference_time: 212.0 throughput: 4716.981132075472 estimated_peak_memory_range: min: 8192 - max: 20477056 + max: 20578192 primary_compute_unit: NPU precision: int8 layer_info: @@ -220,14 +137,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jp3j1ve3g + job_id: jgdxryn6p job_status: Passed torchscript_onnx_qnn: - inference_time: 254.0 - throughput: 3937.0078740157483 + inference_time: 218.0 + throughput: 4587.155963302752 estimated_peak_memory_range: min: 0 - max: 10955152 + max: 11219760 primary_compute_unit: NPU precision: int8 layer_info: @@ -235,22 +152,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jpxk6n135 - job_status: Passed - torchscript_onnx: - inference_time: 403.0 - throughput: 2481.3895781637716 - estimated_peak_memory_range: - min: 0 - max: 27590224 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 91 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 91 - job_id: jp0z23r95 + job_id: jgkelw7vg job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -259,13 +161,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:52:41Z' + timestamp: '2024-11-09T23:48:10Z' - torchscript_onnx_tflite: - inference_time: 1008.0 - throughput: 992.063492063492 + inference_time: 929.0 + throughput: 1076.4262648008612 estimated_peak_memory_range: - min: 12288 - max: 21725264 + min: 36864 + max: 21549776 primary_compute_unit: NPU precision: int8 layer_info: @@ -273,14 +175,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jpy1jr68p + job_id: j57yj12n5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1140.0 - throughput: 877.1929824561404 + inference_time: 1402.0 + throughput: 713.2667617689016 estimated_peak_memory_range: - min: 12288 - max: 7833008 + min: 172032 + max: 7935328 primary_compute_unit: NPU precision: int8 layer_info: @@ -288,7 +190,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jgz3jo7o5 + job_id: j5q67xeep job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -297,13 +199,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-27T00:52:17Z' + timestamp: '2024-11-09T23:47:51Z' - torchscript_onnx_tflite: - inference_time: 5779.0 - throughput: 173.04031839418585 + inference_time: 5704.0 + throughput: 175.3155680224404 estimated_peak_memory_range: - min: 12288 - max: 6788752 + min: 53248 + max: 2022760 primary_compute_unit: NPU precision: int8 layer_info: @@ -311,7 +213,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jp0z23l95 + job_id: jp4lx6n25 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -320,13 +222,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-27T00:51:58Z' + timestamp: '2024-11-09T23:47:34Z' - torchscript_onnx_tflite: - inference_time: 285.0 - throughput: 3508.7719298245615 + inference_time: 286.0 + throughput: 3496.5034965034965 estimated_peak_memory_range: min: 12288 - max: 113808288 + max: 1264440 primary_compute_unit: NPU precision: int8 layer_info: @@ -334,14 +236,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jp8qm0zkp + job_id: jpxk78985 job_status: Passed torchscript_onnx_qnn: - inference_time: 315.0 - throughput: 3174.6031746031745 + inference_time: 303.0 + throughput: 3300.3300330033003 estimated_peak_memory_range: min: 176128 - max: 1393136 + max: 1344376 primary_compute_unit: NPU precision: int8 layer_info: @@ -349,7 +251,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: j5we32935 + job_id: jglv09625 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -358,13 +260,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:52:19Z' + timestamp: '2024-11-09T23:47:53Z' - torchscript_onnx_tflite: - inference_time: 285.0 - throughput: 3508.7719298245615 + inference_time: 282.0 + throughput: 3546.099290780142 estimated_peak_memory_range: - min: 12288 - max: 1420448 + min: 20480 + max: 113810672 primary_compute_unit: NPU precision: int8 layer_info: @@ -372,14 +274,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jpy1j018p + job_id: j5mnw1e7p job_status: Passed torchscript_onnx_qnn: - inference_time: 300.0 - throughput: 3333.3333333333335 + inference_time: 307.0 + throughput: 3257.328990228013 estimated_peak_memory_range: - min: 184320 - max: 1358256 + min: 180224 + max: 1431056 primary_compute_unit: NPU precision: int8 layer_info: @@ -387,7 +289,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: j5q6ro6np + job_id: jp3j4lvmg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -396,13 +298,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T02:08:04Z' + timestamp: '2024-11-09T23:47:57Z' - torchscript_onnx_tflite: - inference_time: 281.0 - throughput: 3558.7188612099644 + inference_time: 288.0 + throughput: 3472.222222222222 estimated_peak_memory_range: - min: 36864 - max: 80216920 + min: 12288 + max: 1963696 primary_compute_unit: NPU precision: int8 layer_info: @@ -410,14 +312,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jp0z27z95 + job_id: jgn69d0j5 job_status: Passed torchscript_onnx_qnn: - inference_time: 308.0 - throughput: 3246.753246753247 + inference_time: 306.0 + throughput: 3267.97385620915 estimated_peak_memory_range: - min: 172032 - max: 1627160 + min: 176128 + max: 1494976 primary_compute_unit: NPU precision: int8 layer_info: @@ -425,7 +327,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jglv2rvj5 + job_id: jgo217k1p job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -434,13 +336,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T02:08:06Z' + timestamp: '2024-11-09T23:47:59Z' - torchscript_onnx_tflite: - inference_time: 279.0 - throughput: 3584.2293906810037 + inference_time: 284.0 + throughput: 3521.1267605633802 estimated_peak_memory_range: - min: 12288 - max: 25409704 + min: 16384 + max: 1372552 primary_compute_unit: NPU precision: int8 layer_info: @@ -448,14 +350,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jgkeqmewg + job_id: jprv4m6kg job_status: Passed torchscript_onnx_qnn: - inference_time: 305.0 - throughput: 3278.688524590164 + inference_time: 304.0 + throughput: 3289.4736842105262 estimated_peak_memory_range: - min: 167936 - max: 1327576 + min: 180224 + max: 1453744 primary_compute_unit: NPU precision: int8 layer_info: @@ -463,7 +365,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jp3j12j3g + job_id: jpv61y0z5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -472,13 +374,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T02:08:09Z' + timestamp: '2024-11-09T23:48:01Z' - torchscript_onnx_tflite: inference_time: 654.0 throughput: 1529.051987767584 estimated_peak_memory_range: min: 12288 - max: 19908576 + max: 20022176 primary_compute_unit: NPU precision: int8 layer_info: @@ -486,14 +388,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jp8qmvqkp + job_id: jp2k7qx6p job_status: Passed torchscript_onnx_qnn: - inference_time: 884.0 - throughput: 1131.2217194570135 + inference_time: 1004.0 + throughput: 996.01593625498 estimated_peak_memory_range: - min: 163840 - max: 6056208 + min: 0 + max: 5848896 primary_compute_unit: NPU precision: int8 layer_info: @@ -501,7 +403,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: j56yzly6p + job_id: jgjv06z1g job_status: Passed reference_device_info: name: SA8295P ADP @@ -510,13 +412,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T02:08:07Z' + timestamp: '2024-11-09T23:48:02Z' - torchscript_onnx_tflite: - inference_time: 347.0 - throughput: 2881.844380403458 + inference_time: 341.0 + throughput: 2932.551319648094 estimated_peak_memory_range: - min: 12288 - max: 40490512 + min: 20480 + max: 39952928 primary_compute_unit: NPU precision: int8 layer_info: @@ -524,14 +426,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: j56yzen6p + job_id: jpy14kz0p job_status: Passed torchscript_onnx_qnn: - inference_time: 404.0 - throughput: 2475.2475247524753 + inference_time: 418.0 + throughput: 2392.3444976076553 estimated_peak_memory_range: - min: 163840 - max: 18533392 + min: 159744 + max: 16881392 primary_compute_unit: NPU precision: int8 layer_info: @@ -539,7 +441,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jp4ldkw85 + job_id: jpedr0e85 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -548,13 +450,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:52:29Z' + timestamp: '2024-11-09T23:48:04Z' - torchscript_onnx_qnn: - inference_time: 436.0 - throughput: 2293.577981651376 + inference_time: 409.0 + throughput: 2444.987775061125 estimated_peak_memory_range: - min: 606208 - max: 606208 + min: 487424 + max: 487424 primary_compute_unit: NPU precision: int8 layer_info: @@ -562,22 +464,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jg9lyj4wg + job_id: j56y39enp job_status: Passed torchscript_onnx: - inference_time: 559.0 - throughput: 1788.9087656529516 + inference_time: 149585.0 + throughput: 6.685162282314403 estimated_peak_memory_range: - min: 9601024 - max: 9601024 + min: 60530688 + max: 60530688 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 91 + layers_on_npu: 319 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 91 - job_id: jpy1jrd8p + layers_on_cpu: 57 + total_layers: 376 + job_id: jp14dkynp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -586,4 +488,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:52:39Z' + timestamp: '2024-11-09T23:48:11Z' diff --git a/qai_hub_models/models/hrnet_pose/README.md b/qai_hub_models/models/hrnet_pose/README.md index 6c41c41d..15d24853 100644 --- a/qai_hub_models/models/hrnet_pose/README.md +++ b/qai_hub_models/models/hrnet_pose/README.md @@ -5,8 +5,7 @@ HRNet performs pose estimation in high-resolution representations. -This is based on the implementation of HRNetPose found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/hrnet_pose). diff --git a/qai_hub_models/models/hrnet_pose/perf.yaml b/qai_hub_models/models/hrnet_pose/perf.yaml index 72b294ca..d9577dc9 100644 --- a/qai_hub_models/models/hrnet_pose/perf.yaml +++ b/qai_hub_models/models/hrnet_pose/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: HRNetPose performance_metrics: - torchscript_onnx_tflite: - inference_time: 2845.0 - throughput: 351.493848857645 + inference_time: 2811.0 + throughput: 355.7452863749555 estimated_peak_memory_range: - min: 32768 - max: 2412448 + min: 28672 + max: 2054824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 516 - job_id: jg9lyn7vg + job_id: jg9l3o8lg job_status: Passed torchscript_onnx_qnn: - inference_time: 2969.0 - throughput: 336.81374200067364 + inference_time: 2954.0 + throughput: 338.52403520649966 estimated_peak_memory_range: min: 606208 - max: 12489064 + max: 17591800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: jp2k9y1mp + job_id: jp2k7oj4p job_status: Passed torchscript_onnx: - inference_time: 2932.0 - throughput: 341.06412005457025 + inference_time: 2892.0 + throughput: 345.78146611341634 estimated_peak_memory_range: - min: 16384 - max: 60450848 + min: 40960 + max: 60420440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 749 - job_id: jpv6rk8m5 + job_id: jpv6124j5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:06:37Z' + timestamp: '2024-11-09T22:53:21Z' - torchscript_onnx_tflite: - inference_time: 2841.0 - throughput: 351.98873636043646 + inference_time: 2082.0 + throughput: 480.3073967339097 estimated_peak_memory_range: min: 16384 - max: 2169864 + max: 129649616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 516 - job_id: jp14wzklp + job_id: jp14do32p job_status: Passed torchscript_onnx_qnn: - inference_time: 2958.0 - throughput: 338.0662609871535 + inference_time: 2144.0 + throughput: 466.4179104477612 estimated_peak_memory_range: min: 606208 - max: 14097040 + max: 36657152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,67 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: jpy1j3l4p + job_id: jpy148n7p job_status: Passed torchscript_onnx: - inference_time: 2965.0 - throughput: 337.2681281618887 - estimated_peak_memory_range: - min: 20480 - max: 60746584 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 749 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 749 - job_id: jgjv2n98g - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:06:38Z' - - torchscript_onnx_tflite: - inference_time: 2310.0 - throughput: 432.9004329004329 - estimated_peak_memory_range: - min: 16384 - max: 127359152 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 516 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 516 - job_id: jgdxq1ylp - job_status: Passed - torchscript_onnx_qnn: - inference_time: 2382.0 - throughput: 419.81528127623847 + inference_time: 2167.0 + throughput: 461.4674665436087 estimated_peak_memory_range: min: 606208 - max: 34781072 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 747 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 747 - job_id: jp0z20we5 - job_status: Passed - torchscript_onnx: - inference_time: 2573.0 - throughput: 388.65137971239795 - estimated_peak_memory_range: - min: 0 - max: 155684848 + max: 157655872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,7 +138,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 749 - job_id: jpedwmq05 + job_id: jgjv031xg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -198,13 +147,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:06:39Z' + timestamp: '2024-11-09T22:53:22Z' - torchscript_onnx_tflite: - inference_time: 1974.0 - throughput: 506.5856129685917 + inference_time: 1969.0 + throughput: 507.87201625190454 estimated_peak_memory_range: min: 12288 - max: 61896176 + max: 61493808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 516 - job_id: jprv23reg + job_id: jgdxr60ep job_status: Passed torchscript_onnx_qnn: - inference_time: 2095.0 - throughput: 477.326968973747 + inference_time: 2110.0 + throughput: 473.93364928909955 estimated_peak_memory_range: - min: 602112 - max: 34796352 + min: 0 + max: 33778608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: jgo2n6xdp + job_id: jp0z1ok65 job_status: Passed torchscript_onnx: - inference_time: 2142.0 - throughput: 466.8534080298786 + inference_time: 2160.0 + throughput: 462.962962962963 estimated_peak_memory_range: min: 0 - max: 74814144 + max: 74612448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -242,7 +191,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 749 - job_id: j5we36kj5 + job_id: jpedr6215 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -251,13 +200,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:06:41Z' + timestamp: '2024-11-09T22:53:23Z' - torchscript_onnx_tflite: - inference_time: 2807.0 - throughput: 356.2522265764161 + inference_time: 2814.0 + throughput: 355.36602700781805 estimated_peak_memory_range: - min: 28672 - max: 2417944 + min: 16384 + max: 1847360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 516 - job_id: j57ylrmr5 + job_id: j57yjo6l5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2706.0 - throughput: 369.5491500369549 + inference_time: 2714.0 + throughput: 368.45983787767136 estimated_peak_memory_range: - min: 630784 - max: 1839904 + min: 622592 + max: 1869912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,7 +229,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: jp8qmyn8p + job_id: jp8q3j8xp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -289,13 +238,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:06:29Z' + timestamp: '2024-11-09T22:53:13Z' - torchscript_onnx_tflite: - inference_time: 2801.0 - throughput: 357.0153516601214 + inference_time: 2841.0 + throughput: 351.98873636043646 estimated_peak_memory_range: - min: 16384 - max: 2392456 + min: 401408 + max: 2620352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 516 - job_id: jgdxq9ozp + job_id: jp4lxe8v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2760.0 - throughput: 362.3188405797101 + inference_time: 2754.0 + throughput: 363.10820624546113 estimated_peak_memory_range: - min: 24576 - max: 1188584 + min: 618496 + max: 1964832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,7 +267,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: jpxk6jyl5 + job_id: j5q674w4p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -327,13 +276,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:43:33Z' + timestamp: '2024-11-09T22:53:15Z' - torchscript_onnx_tflite: - inference_time: 2853.0 - throughput: 350.5082369435682 + inference_time: 2806.0 + throughput: 356.3791874554526 estimated_peak_memory_range: - min: 16384 - max: 2267776 + min: 225280 + max: 2333592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 516 - job_id: j57ylwx95 + job_id: jpxk70m15 job_status: Passed torchscript_onnx_qnn: - inference_time: 2725.0 - throughput: 366.9724770642202 + inference_time: 2752.0 + throughput: 363.3720930232558 estimated_peak_memory_range: min: 630784 - max: 2039504 + max: 2286736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,7 +305,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: j5mn6239p + job_id: jglv0w785 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -365,13 +314,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:43:34Z' + timestamp: '2024-11-09T22:53:16Z' - torchscript_onnx_tflite: - inference_time: 2805.0 - throughput: 356.50623885918003 + inference_time: 2813.0 + throughput: 355.49235691432636 estimated_peak_memory_range: min: 28672 - max: 2780936 + max: 2476864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 516 - job_id: jp4ldov15 + job_id: j5mnw94wp job_status: Passed torchscript_onnx_qnn: - inference_time: 2716.0 - throughput: 368.1885125184094 + inference_time: 2744.0 + throughput: 364.4314868804665 estimated_peak_memory_range: min: 626688 - max: 2003528 + max: 1958624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,7 +343,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: jgn6my3q5 + job_id: j56y3ov0p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -403,13 +352,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:43:35Z' + timestamp: '2024-11-09T22:53:18Z' - torchscript_onnx_tflite: - inference_time: 3736.0 - throughput: 267.6659528907923 + inference_time: 4653.0 + throughput: 214.9151085321298 estimated_peak_memory_range: - min: 53248 - max: 114071408 + min: 45056 + max: 53135680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +366,52 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 516 - job_id: jgn6mv4m5 + job_id: jgn691xr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3879.0 - throughput: 257.79840164990975 + inference_time: 4680.0 + throughput: 213.67521367521368 estimated_peak_memory_range: - min: 606208 - max: 31357168 + min: 659456 + max: 6541056 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 747 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 747 + job_id: jp3j4o8lg + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:53:19Z' + - torchscript_onnx_tflite: + inference_time: 3726.0 + throughput: 268.3843263553409 + estimated_peak_memory_range: + min: 36864 + max: 115130160 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 516 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 516 + job_id: jprv4x99g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 3921.0 + throughput: 255.03698036215252 + estimated_peak_memory_range: + min: 0 + max: 31168544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: jp3j10dzg + job_id: jgo21dmxp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:06:35Z' + timestamp: '2024-11-09T22:53:20Z' - torchscript_onnx_qnn: - inference_time: 3002.0 - throughput: 333.11125916055965 + inference_time: 2992.0 + throughput: 334.2245989304813 estimated_peak_memory_range: min: 589824 max: 589824 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: jgkeqx1og + job_id: jgkel6d2g job_status: Passed torchscript_onnx: - inference_time: 2966.0 - throughput: 337.1544167228591 + inference_time: 2980.0 + throughput: 335.5704697986577 estimated_peak_memory_range: - min: 59588608 - max: 59588608 + min: 59523072 + max: 59523072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 749 - job_id: jgz3jd665 + job_id: jgz3xzwk5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:06:40Z' + timestamp: '2024-11-09T22:53:24Z' diff --git a/qai_hub_models/models/hrnet_pose_quantized/README.md b/qai_hub_models/models/hrnet_pose_quantized/README.md index 03707c36..8c91680e 100644 --- a/qai_hub_models/models/hrnet_pose_quantized/README.md +++ b/qai_hub_models/models/hrnet_pose_quantized/README.md @@ -5,8 +5,7 @@ HRNet performs pose estimation in high-resolution representations. -This is based on the implementation of HRNetPoseQuantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/hrnet_pose_quantized). diff --git a/qai_hub_models/models/hrnet_pose_quantized/perf.yaml b/qai_hub_models/models/hrnet_pose_quantized/perf.yaml index 971b830f..eb0b8971 100644 --- a/qai_hub_models/models/hrnet_pose_quantized/perf.yaml +++ b/qai_hub_models/models/hrnet_pose_quantized/perf.yaml @@ -46,34 +46,11 @@ models: - name: HRNetPoseQuantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 952.0 - throughput: 1050.420168067227 - estimated_peak_memory_range: - min: 16384 - max: 61655920 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 518 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 518 - job_id: jgjv2n87g - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:05:31Z' - - torchscript_onnx_tflite: - inference_time: 945.0 - throughput: 1058.2010582010582 + inference_time: 963.0 + throughput: 1038.4215991692627 estimated_peak_memory_range: - min: 0 - max: 2300600 + min: 12288 + max: 1998776 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,14 +58,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 518 - job_id: jpedwmn75 + job_id: jgz3xzx65 job_status: Passed torchscript_onnx_qnn: - inference_time: 1245.0 - throughput: 803.2128514056225 + inference_time: 1241.0 + throughput: 805.8017727639001 estimated_peak_memory_range: - min: 16384 - max: 11183544 + min: 12288 + max: 16187088 primary_compute_unit: NPU precision: int8 layer_info: @@ -96,7 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 748 - job_id: jpxk6o895 + job_id: jp2k7o3mp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -105,13 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:05:32Z' + timestamp: '2024-11-09T22:52:16Z' - torchscript_onnx_tflite: - inference_time: 704.0 - throughput: 1420.4545454545455 + inference_time: 708.0 + throughput: 1412.4293785310736 estimated_peak_memory_range: min: 12288 - max: 112509680 + max: 111092256 primary_compute_unit: NPU precision: int8 layer_info: @@ -119,14 +96,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 518 - job_id: jgz3jd0z5 + job_id: j5wedydj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 913.0 - throughput: 1095.290251916758 + inference_time: 895.0 + throughput: 1117.31843575419 estimated_peak_memory_range: - min: 0 - max: 34553104 + min: 163840 + max: 35919888 primary_compute_unit: NPU precision: int8 layer_info: @@ -134,7 +111,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 748 - job_id: j5mn6x1qp + job_id: jpy148v4p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -143,13 +120,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:05:34Z' + timestamp: '2024-11-09T22:52:17Z' - torchscript_onnx_tflite: - inference_time: 573.0 - throughput: 1745.2006980802792 + inference_time: 574.0 + throughput: 1742.1602787456445 estimated_peak_memory_range: min: 8192 - max: 66920640 + max: 66758928 primary_compute_unit: NPU precision: int8 layer_info: @@ -157,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 518 - job_id: jgdxq1mlp + job_id: jg9l3o3vg job_status: Passed torchscript_onnx_qnn: - inference_time: 846.0 - throughput: 1182.033096926714 + inference_time: 749.0 + throughput: 1335.1134846461948 estimated_peak_memory_range: min: 0 - max: 32347776 + max: 34344864 primary_compute_unit: NPU precision: int8 layer_info: @@ -172,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 748 - job_id: j5q6rqxmp + job_id: jp0z1oee5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -181,13 +158,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:05:43Z' + timestamp: '2024-11-09T22:52:18Z' - torchscript_onnx_tflite: - inference_time: 3962.0 - throughput: 252.39777889954567 + inference_time: 3983.0 + throughput: 251.06703489831784 estimated_peak_memory_range: - min: 77824 - max: 70944272 + min: 262144 + max: 70917920 primary_compute_unit: NPU precision: int8 layer_info: @@ -195,14 +172,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 518 - job_id: j5we36rz5 + job_id: jp14dodlp job_status: Passed torchscript_onnx_qnn: - inference_time: 5512.0 - throughput: 181.42235123367198 + inference_time: 5337.0 + throughput: 187.37118231216039 estimated_peak_memory_range: min: 163840 - max: 6977968 + max: 8369248 primary_compute_unit: NPU precision: int8 layer_info: @@ -210,7 +187,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 748 - job_id: jgn6mvdm5 + job_id: jp8q3jw8p job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -219,13 +196,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-27T00:05:35Z' + timestamp: '2024-11-09T22:52:20Z' - torchscript_onnx_tflite: - inference_time: 17182.0 - throughput: 58.200442323361656 + inference_time: 17077.0 + throughput: 58.55829478245594 estimated_peak_memory_range: - min: 1318912 - max: 11155792 + min: 65536 + max: 3713552 primary_compute_unit: NPU precision: int8 layer_info: @@ -233,7 +210,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 518 - job_id: jg9lynqqg + job_id: jgdxr6rlp job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -242,13 +219,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-27T00:05:23Z' + timestamp: '2024-11-09T22:52:09Z' - torchscript_onnx_tflite: - inference_time: 949.0 - throughput: 1053.740779768177 + inference_time: 948.0 + throughput: 1054.8523206751054 estimated_peak_memory_range: - min: 24576 - max: 202079664 + min: 12288 + max: 3420456 primary_compute_unit: NPU precision: int8 layer_info: @@ -256,14 +233,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 518 - job_id: jp14wzmkp + job_id: j57yjovr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1203.0 - throughput: 831.255195344971 + inference_time: 1201.0 + throughput: 832.6394671107411 estimated_peak_memory_range: min: 180224 - max: 1316960 + max: 1289600 primary_compute_unit: NPU precision: int8 layer_info: @@ -271,7 +248,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 748 - job_id: jprv23meg + job_id: jgkel6rog job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -280,13 +257,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:05:36Z' + timestamp: '2024-11-09T22:52:21Z' - torchscript_onnx_tflite: - inference_time: 951.0 - throughput: 1051.5247108307046 + inference_time: 970.0 + throughput: 1030.9278350515465 estimated_peak_memory_range: - min: 73728 - max: 198315200 + min: 16384 + max: 2587072 primary_compute_unit: NPU precision: int8 layer_info: @@ -294,14 +271,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 518 - job_id: jglv2z3m5 + job_id: jp4lxejl5 job_status: Passed torchscript_onnx_qnn: inference_time: 1217.0 throughput: 821.6926869350863 estimated_peak_memory_range: - min: 184320 - max: 1420904 + min: 172032 + max: 1970072 primary_compute_unit: NPU precision: int8 layer_info: @@ -309,7 +286,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 748 - job_id: jpv6rovr5 + job_id: jglv0wel5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -318,13 +295,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:43:03Z' + timestamp: '2024-11-09T22:52:23Z' - torchscript_onnx_tflite: - inference_time: 963.0 - throughput: 1038.4215991692627 + inference_time: 960.0 + throughput: 1041.6666666666667 estimated_peak_memory_range: - min: 69632 - max: 214578152 + min: 20480 + max: 2459680 primary_compute_unit: NPU precision: int8 layer_info: @@ -332,14 +309,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 518 - job_id: j56yzjnyp + job_id: jpxk70e95 job_status: Passed torchscript_onnx_qnn: - inference_time: 1212.0 - throughput: 825.0825082508251 + inference_time: 1209.0 + throughput: 827.129859387924 estimated_peak_memory_range: - min: 180224 - max: 1865072 + min: 172032 + max: 2046520 primary_compute_unit: NPU precision: int8 layer_info: @@ -347,7 +324,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 748 - job_id: jgjv2meeg + job_id: j56y3oq7p job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -356,13 +333,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:43:04Z' + timestamp: '2024-11-09T22:52:24Z' - torchscript_onnx_tflite: - inference_time: 951.0 - throughput: 1051.5247108307046 + inference_time: 962.0 + throughput: 1039.5010395010395 estimated_peak_memory_range: - min: 36864 - max: 1635080 + min: 12288 + max: 4136080 primary_compute_unit: NPU precision: int8 layer_info: @@ -370,14 +347,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 518 - job_id: jgo2n03kp + job_id: j5mnw9vqp job_status: Passed torchscript_onnx_qnn: - inference_time: 1222.0 - throughput: 818.3306055646481 + inference_time: 1217.0 + throughput: 821.6926869350863 estimated_peak_memory_range: - min: 212992 - max: 1407656 + min: 192512 + max: 1467720 primary_compute_unit: NPU precision: int8 layer_info: @@ -385,7 +362,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 748 - job_id: jgz3j9rx5 + job_id: jgo21dedp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -394,13 +371,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:43:06Z' - - torchscript_onnx_qnn: - inference_time: 2661.0 - throughput: 375.7985719654265 + timestamp: '2024-11-09T22:52:26Z' + - torchscript_onnx_tflite: + inference_time: 1656.0 + throughput: 603.864734299517 + estimated_peak_memory_range: + min: 28672 + max: 66518352 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 518 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 518 + job_id: jgn691rm5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 2029.0 + throughput: 492.8536224741252 estimated_peak_memory_range: min: 0 - max: 5805696 + max: 5721296 primary_compute_unit: NPU precision: int8 layer_info: @@ -408,7 +400,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 748 - job_id: jpedw1kv5 + job_id: jpv612zm5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -417,13 +409,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T01:43:05Z' + timestamp: '2024-11-09T22:52:27Z' - torchscript_onnx_tflite: - inference_time: 1152.0 - throughput: 868.0555555555555 + inference_time: 1174.0 + throughput: 851.7887563884157 estimated_peak_memory_range: - min: 0 - max: 115386672 + min: 12288 + max: 114529504 primary_compute_unit: NPU precision: int8 layer_info: @@ -431,14 +423,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 518 - job_id: jp14wzmlp + job_id: jprv4x1eg job_status: Passed torchscript_onnx_qnn: - inference_time: 1457.0 - throughput: 686.3417982155113 + inference_time: 1459.0 + throughput: 685.4009595613434 estimated_peak_memory_range: - min: 167936 - max: 38644656 + min: 163840 + max: 37869440 primary_compute_unit: NPU precision: int8 layer_info: @@ -446,7 +438,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 748 - job_id: jgkeqxwog + job_id: jgjv03k8g job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -455,13 +447,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:05:42Z' + timestamp: '2024-11-09T22:52:28Z' - torchscript_onnx_qnn: - inference_time: 1340.0 - throughput: 746.2686567164179 + inference_time: 1414.0 + throughput: 707.2135785007072 estimated_peak_memory_range: - min: 339968 - max: 339968 + min: 319488 + max: 319488 primary_compute_unit: NPU precision: int8 layer_info: @@ -469,7 +461,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 748 - job_id: jp2k9yqmp + job_id: j5q6749mp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -478,4 +470,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:05:37Z' + timestamp: '2024-11-09T22:52:22Z' diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/README.md b/qai_hub_models/models/huggingface_wavlm_base_plus/README.md index 6670fbde..37c3e68c 100644 --- a/qai_hub_models/models/huggingface_wavlm_base_plus/README.md +++ b/qai_hub_models/models/huggingface_wavlm_base_plus/README.md @@ -5,8 +5,7 @@ HuggingFaceWavLMBasePlus is a real time speech processing backbone based on Microsoft's WavLM model. -This is based on the implementation of HuggingFace-WavLM-Base-Plus found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/huggingface_wavlm_base_plus). diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/export.py b/qai_hub_models/models/huggingface_wavlm_base_plus/export.py index 3796b3e7..3b7ef054 100644 --- a/qai_hub_models/models/huggingface_wavlm_base_plus/export.py +++ b/qai_hub_models/models/huggingface_wavlm_base_plus/export.py @@ -201,12 +201,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser( - model_cls=Model, - supports_qnn=False, - supports_onnx=False, - supports_precompiled_qnn_onnx=False, - ) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_onnx=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml b/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml index b378e2cc..fce62960 100644 --- a/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml +++ b/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: HuggingFace-WavLM-Base-Plus performance_metrics: - torchscript_onnx_tflite: - inference_time: 853883.0 - throughput: 1.1711206336231077 + inference_time: 814795.0 + throughput: 1.2273025730398444 estimated_peak_memory_range: - min: 65822720 - max: 101117368 + min: 65556480 + max: 102684104 primary_compute_unit: CPU precision: fp32 layer_info: @@ -53,7 +55,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 871 total_layers: 871 - job_id: j5we36zz5 + job_id: jgdxr6vlp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -62,13 +64,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:02:52Z' + timestamp: '2024-11-09T22:49:34Z' - torchscript_onnx_tflite: - inference_time: 933149.0 - throughput: 1.071640220372095 + inference_time: 739112.0 + throughput: 1.3529749212568596 estimated_peak_memory_range: - min: 66416640 - max: 102764232 + min: 31469568 + max: 52509824 primary_compute_unit: CPU precision: fp32 layer_info: @@ -76,30 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 871 total_layers: 871 - job_id: jg9lyn2qg - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:02:53Z' - - torchscript_onnx_tflite: - inference_time: 631934.0 - throughput: 1.5824437362129589 - estimated_peak_memory_range: - min: 65609728 - max: 87473456 - primary_compute_unit: CPU - precision: fp32 - layer_info: - layers_on_npu: 0 - layers_on_gpu: 0 - layers_on_cpu: 871 - total_layers: 871 - job_id: jp14wz1kp + job_id: j57yjojr5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -108,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:02:55Z' + timestamp: '2024-11-09T22:49:35Z' - torchscript_onnx_tflite: - inference_time: 530348.0 - throughput: 1.8855543907019543 + inference_time: 530552.0 + throughput: 1.8848293852440476 estimated_peak_memory_range: - min: 65978368 - max: 81640576 + min: 65560576 + max: 81155600 primary_compute_unit: CPU precision: fp32 layer_info: @@ -122,7 +101,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 871 total_layers: 871 - job_id: jgn6mvwv5 + job_id: jp4lxexl5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -131,13 +110,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:03:01Z' + timestamp: '2024-11-09T22:49:36Z' - torchscript_onnx_tflite: - inference_time: 871115.0 - throughput: 1.1479540588785637 + inference_time: 742901.0 + throughput: 1.3460743759935712 estimated_peak_memory_range: - min: 65560576 - max: 68293552 + min: 65892352 + max: 68934792 primary_compute_unit: CPU precision: fp32 layer_info: @@ -145,7 +124,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 871 total_layers: 871 - job_id: jgdxq14kp + job_id: jpxk70795 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -154,13 +133,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:02:56Z' + timestamp: '2024-11-09T22:49:37Z' - torchscript_onnx_tflite: - inference_time: 870475.0 - throughput: 1.1487980700192424 + inference_time: 927766.0 + throughput: 1.0778579943649584 estimated_peak_memory_range: - min: 65998848 - max: 139985248 + min: 65581056 + max: 639215792 primary_compute_unit: CPU precision: fp32 layer_info: @@ -168,7 +147,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 871 total_layers: 871 - job_id: jp2k969qp + job_id: j5mnw9wqp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -177,13 +156,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:40:56Z' + timestamp: '2024-11-09T22:49:38Z' - torchscript_onnx_tflite: - inference_time: 881797.0 - throughput: 1.1340478590877492 + inference_time: 919003.0 + throughput: 1.0881357296983796 estimated_peak_memory_range: - min: 65576960 - max: 101009280 + min: 65712128 + max: 105308752 primary_compute_unit: CPU precision: fp32 layer_info: @@ -191,7 +170,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 871 total_layers: 871 - job_id: jpy1jwjlp + job_id: jgn6919m5 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -200,13 +179,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:40:56Z' + timestamp: '2024-11-09T22:49:39Z' - torchscript_onnx_tflite: - inference_time: 927864.0 - throughput: 1.0777441521602305 + inference_time: 887749.0 + throughput: 1.1264445242968453 estimated_peak_memory_range: - min: 71507968 - max: 373592224 + min: 62672896 + max: 66038360 primary_compute_unit: CPU precision: fp32 layer_info: @@ -214,7 +193,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 871 total_layers: 871 - job_id: jp0z2q2n5 + job_id: jprv4x4eg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -223,13 +202,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:40:57Z' + timestamp: '2024-11-09T22:49:40Z' + - torchscript_onnx_tflite: + inference_time: 1027812.0 + throughput: 0.9729405766813386 + estimated_peak_memory_range: + min: 65581056 + max: 86060080 + primary_compute_unit: CPU + precision: fp32 + layer_info: + layers_on_npu: 0 + layers_on_gpu: 0 + layers_on_cpu: 871 + total_layers: 871 + job_id: jp2k7o7mp + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:49:41Z' - torchscript_onnx_tflite: - inference_time: 1018950.0 - throughput: 0.9814024240639875 + inference_time: 1159685.0 + throughput: 0.8623031254176781 estimated_peak_memory_range: - min: 66109440 - max: 93138592 + min: 31010816 + max: 59631232 primary_compute_unit: CPU precision: fp32 layer_info: @@ -237,7 +239,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 871 total_layers: 871 - job_id: j5mn6xkyp + job_id: jpy14844p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -246,4 +248,4 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:03:00Z' + timestamp: '2024-11-09T22:49:42Z' diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/requirements.txt b/qai_hub_models/models/huggingface_wavlm_base_plus/requirements.txt index 71ec7dee..5789f7f6 100644 --- a/qai_hub_models/models/huggingface_wavlm_base_plus/requirements.txt +++ b/qai_hub_models/models/huggingface_wavlm_base_plus/requirements.txt @@ -1,4 +1,3 @@ transformers==4.41.1 soundfile==0.12.1 librosa==0.10.1 -datasets==2.14.5 diff --git a/qai_hub_models/models/ibm_granite_3b_code_instruct/README.md b/qai_hub_models/models/ibm_granite_3b_code_instruct/README.md index 4bc8ef0d..c5036921 100644 --- a/qai_hub_models/models/ibm_granite_3b_code_instruct/README.md +++ b/qai_hub_models/models/ibm_granite_3b_code_instruct/README.md @@ -5,8 +5,7 @@ Granite-3B-Code-Instruct-2K is a 3B parameter model fine tuned from Granite-3B-Code-Base-2K on a combination of permissively licensed instruction data to enhance instruction following capabilities including logical reasoning and problem-solving skills. -This is based on the implementation of IBM-Granite-3B-Code-Instruct found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/ibm_granite_3b_code_instruct). diff --git a/qai_hub_models/models/inception_v3/README.md b/qai_hub_models/models/inception_v3/README.md index deeeab5e..b38829f8 100644 --- a/qai_hub_models/models/inception_v3/README.md +++ b/qai_hub_models/models/inception_v3/README.md @@ -5,8 +5,7 @@ InceptionNetV3 is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of Inception-v3 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/inception_v3). diff --git a/qai_hub_models/models/inception_v3/perf.yaml b/qai_hub_models/models/inception_v3/perf.yaml index c94e6afa..47e83fca 100644 --- a/qai_hub_models/models/inception_v3/perf.yaml +++ b/qai_hub_models/models/inception_v3/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,70 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: Inception-v3 performance_metrics: - - torchscript_onnx_tflite: - inference_time: 1329.0 - throughput: 752.4454477050414 - estimated_peak_memory_range: - min: 32768 - max: 2042600 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 129 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 129 - job_id: jpv6rk275 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 1429.0 - throughput: 699.7900629811056 - estimated_peak_memory_range: - min: 536576 - max: 148893576 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 219 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 219 - job_id: jp4ldreq5 - job_status: Passed - torchscript_onnx: - inference_time: 1776.0 - throughput: 563.063063063063 - estimated_peak_memory_range: - min: 32768 - max: 51741496 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 221 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 221 - job_id: j5q6rq17p - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:02:23Z' - torchscript_onnx_tflite: inference_time: 1330.0 throughput: 751.8796992481203 estimated_peak_memory_range: - min: 12288 - max: 2037632 + min: 16384 + max: 1875448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jgjv2n37g + job_id: jgz3xzr65 job_status: Passed torchscript_onnx_qnn: - inference_time: 1399.0 - throughput: 714.7962830593281 + inference_time: 1411.0 + throughput: 708.7172218284904 estimated_peak_memory_range: - min: 581632 - max: 148724504 + min: 516096 + max: 148301040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jpxk6o0j5 + job_id: jgn691em5 job_status: Passed torchscript_onnx: - inference_time: 1733.0 - throughput: 577.0340450086555 + inference_time: 1761.0 + throughput: 567.8591709256104 estimated_peak_memory_range: - min: 12288 - max: 51635176 + min: 16384 + max: 52420320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jglv2m8e5 + job_id: jp3j4omzg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -145,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:02:24Z' + timestamp: '2024-11-09T22:49:05Z' - torchscript_onnx_tflite: - inference_time: 1145.0 - throughput: 873.3624454148471 + inference_time: 999.0 + throughput: 1001.001001001001 estimated_peak_memory_range: - min: 16384 - max: 60548688 + min: 0 + max: 61224752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jpedwm675 + job_id: j5wedyqj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1199.0 - throughput: 834.0283569641368 + inference_time: 1049.0 + throughput: 953.2888465204957 estimated_peak_memory_range: - min: 618496 - max: 19000160 + min: 0 + max: 18818592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: j5mn6x9yp + job_id: jprv4xyeg job_status: Passed torchscript_onnx: - inference_time: 1775.0 - throughput: 563.3802816901408 + inference_time: 1289.0 + throughput: 775.7951900698216 estimated_peak_memory_range: min: 0 - max: 59764160 + max: 60205856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,7 +138,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: j56yz4mvp + job_id: jgo21dvdp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -198,13 +147,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:02:25Z' + timestamp: '2024-11-09T22:49:06Z' - torchscript_onnx_tflite: - inference_time: 947.0 - throughput: 1055.9662090813094 + inference_time: 949.0 + throughput: 1053.740779768177 estimated_peak_memory_range: min: 12288 - max: 23637376 + max: 23316848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: j57ylroq5 + job_id: j5wedjy65 job_status: Passed torchscript_onnx_qnn: - inference_time: 881.0 - throughput: 1135.0737797956867 + inference_time: 1049.0 + throughput: 953.2888465204957 estimated_peak_memory_range: - min: 614400 - max: 16193760 + min: 0 + max: 15250208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jgkeqx0yg + job_id: jp2k7ommp job_status: Passed torchscript_onnx: - inference_time: 1258.0 - throughput: 794.912559618442 + inference_time: 1268.0 + throughput: 788.6435331230284 estimated_peak_memory_range: min: 0 - max: 25989504 + max: 25495280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -242,7 +191,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jgo2n6w4p + job_id: jpv612wm5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -251,13 +200,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:02:27Z' + timestamp: '2024-11-11T13:56:29Z' - torchscript_onnx_tflite: inference_time: 1328.0 throughput: 753.0120481927711 estimated_peak_memory_range: - min: 12288 - max: 2094096 + min: 16384 + max: 2259528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jgz3jdzz5 + job_id: jp14doelp job_status: Passed torchscript_onnx_qnn: - inference_time: 1454.0 - throughput: 687.757909215956 + inference_time: 1451.0 + throughput: 689.1798759476223 estimated_peak_memory_range: - min: 634880 - max: 1879640 + min: 630784 + max: 1938496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,7 +229,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jgn6mvqv5 + job_id: jpy148d4p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -289,13 +238,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:02:15Z' + timestamp: '2024-11-09T22:48:57Z' - torchscript_onnx_tflite: - inference_time: 1337.0 - throughput: 747.9431563201197 + inference_time: 1332.0 + throughput: 750.7507507507507 estimated_peak_memory_range: min: 24576 - max: 2340192 + max: 2270840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: j5we3v3m5 + job_id: jgdxr6olp job_status: Passed torchscript_onnx_qnn: inference_time: 1457.0 throughput: 686.3417982155113 estimated_peak_memory_range: - min: 647168 - max: 1945680 + min: 16384 + max: 1512504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,7 +267,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jgdxq9qzp + job_id: jp8q3j78p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -327,13 +276,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:40:29Z' + timestamp: '2024-11-09T22:48:59Z' - torchscript_onnx_tflite: - inference_time: 1326.0 - throughput: 754.1478129713424 + inference_time: 1329.0 + throughput: 752.4454477050414 estimated_peak_memory_range: - min: 28672 - max: 2290792 + min: 147456 + max: 2078784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jg9ly1y8g + job_id: j57yjodr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1467.0 - throughput: 681.6632583503749 + inference_time: 1465.0 + throughput: 682.5938566552901 estimated_peak_memory_range: - min: 634880 - max: 2145408 + min: 704512 + max: 2020320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,7 +305,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: j57ylwl95 + job_id: jgkel6yog job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -365,13 +314,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:40:30Z' + timestamp: '2024-11-09T22:49:00Z' - torchscript_onnx_tflite: - inference_time: 1329.0 - throughput: 752.4454477050414 + inference_time: 1332.0 + throughput: 750.7507507507507 estimated_peak_memory_range: min: 16384 - max: 3066504 + max: 2039552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jp14wlw7p + job_id: jp4lxewl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1450.0 - throughput: 689.6551724137931 + inference_time: 1463.0 + throughput: 683.526999316473 estimated_peak_memory_range: - min: 638976 - max: 1787864 + min: 634880 + max: 1796448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,7 +343,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jp4ldod15 + job_id: j5q6742mp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -403,13 +352,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:40:31Z' + timestamp: '2024-11-09T22:49:02Z' - torchscript_onnx_tflite: - inference_time: 2106.0 - throughput: 474.8338081671415 + inference_time: 2552.0 + throughput: 391.84952978056424 estimated_peak_memory_range: min: 16384 - max: 62277840 + max: 22793200 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 129 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 129 + job_id: jpxk70195 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 2755.0 + throughput: 362.9764065335753 + estimated_peak_memory_range: + min: 0 + max: 5927600 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 219 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 219 + job_id: jglv0wkl5 + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:49:03Z' + - torchscript_onnx_tflite: + inference_time: 2103.0 + throughput: 475.51117451260103 + estimated_peak_memory_range: + min: 20480 + max: 59131056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jgdxq16kp + job_id: j5mnw9zqp job_status: Passed torchscript_onnx_qnn: - inference_time: 2179.0 - throughput: 458.9261128958238 + inference_time: 2183.0 + throughput: 458.0852038479157 estimated_peak_memory_range: - min: 618496 - max: 23970704 + min: 393216 + max: 19995824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jp8qmyrzp + job_id: j56y3o17p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:02:21Z' + timestamp: '2024-11-09T22:49:04Z' - torchscript_onnx_qnn: - inference_time: 1477.0 - throughput: 677.0480704129993 + inference_time: 1501.0 + throughput: 666.2225183211193 estimated_peak_memory_range: min: 602112 max: 602112 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jprv23dvg + job_id: jp0z1ore5 job_status: Passed torchscript_onnx: - inference_time: 1685.0 - throughput: 593.4718100890208 + inference_time: 1659.0 + throughput: 602.7727546714889 estimated_peak_memory_range: - min: 48971776 - max: 48971776 + min: 50130944 + max: 50130944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jp3j107xg + job_id: jgjv03l8g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:02:26Z' + timestamp: '2024-11-09T22:49:08Z' diff --git a/qai_hub_models/models/inception_v3_quantized/README.md b/qai_hub_models/models/inception_v3_quantized/README.md index 84df8a1b..b04e2249 100644 --- a/qai_hub_models/models/inception_v3_quantized/README.md +++ b/qai_hub_models/models/inception_v3_quantized/README.md @@ -5,8 +5,7 @@ InceptionNetV3 is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. This model is post-training quantized to int8 using samples from Google's open images dataset. -This is based on the implementation of Inception-v3-Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/inception_v3_quantized). diff --git a/qai_hub_models/models/inception_v3_quantized/perf.yaml b/qai_hub_models/models/inception_v3_quantized/perf.yaml index 8280a550..04d16bc6 100644 --- a/qai_hub_models/models/inception_v3_quantized/perf.yaml +++ b/qai_hub_models/models/inception_v3_quantized/perf.yaml @@ -49,11 +49,11 @@ models: - name: Inception-v3-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 593.0 - throughput: 1686.3406408094436 + inference_time: 594.0 + throughput: 1683.5016835016836 estimated_peak_memory_range: - min: 57344 - max: 27939864 + min: 24576 + max: 20241240 primary_compute_unit: NPU precision: int8 layer_info: @@ -61,67 +61,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jgdxqeqep + job_id: jp3j4ljmg job_status: Passed torchscript_onnx_qnn: inference_time: 650.0 throughput: 1538.4615384615386 - estimated_peak_memory_range: - min: 16384 - max: 6564176 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 219 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 219 - job_id: jpy1jrj8p - job_status: Passed - torchscript_onnx: - inference_time: 884.0 - throughput: 1131.2217194570135 estimated_peak_memory_range: min: 12288 - max: 31242976 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 130 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 130 - job_id: jgz3joro5 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:51:24Z' - - torchscript_onnx_tflite: - inference_time: 586.0 - throughput: 1706.4846416382252 - estimated_peak_memory_range: - min: 16384 - max: 1454872 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 142 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 142 - job_id: j5we32335 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 648.0 - throughput: 1543.20987654321 - estimated_peak_memory_range: - min: 24576 - max: 28478376 + max: 105857288 primary_compute_unit: NPU precision: int8 layer_info: @@ -129,22 +76,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jp0z23295 - job_status: Passed - torchscript_onnx: - inference_time: 902.0 - throughput: 1108.6474501108648 - estimated_peak_memory_range: - min: 12288 - max: 31247168 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 130 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 130 - job_id: j5we32q35 + job_id: jp4lx6l25 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -153,13 +85,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:51:26Z' + timestamp: '2024-11-09T23:46:58Z' - torchscript_onnx_tflite: - inference_time: 444.0 - throughput: 2252.252252252252 + inference_time: 449.0 + throughput: 2227.1714922048996 estimated_peak_memory_range: - min: 20480 - max: 74037968 + min: 12288 + max: 73828912 primary_compute_unit: NPU precision: int8 layer_info: @@ -167,14 +99,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jg9lyjywg + job_id: jgo21721p job_status: Passed torchscript_onnx_qnn: - inference_time: 497.0 - throughput: 2012.0724346076458 + inference_time: 504.0 + throughput: 1984.126984126984 estimated_peak_memory_range: min: 167936 - max: 19297312 + max: 18484352 primary_compute_unit: NPU precision: int8 layer_info: @@ -182,22 +114,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jp8qm0mkp - job_status: Passed - torchscript_onnx: - inference_time: 688.0 - throughput: 1453.4883720930231 - estimated_peak_memory_range: - min: 12288 - max: 101537328 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 130 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 130 - job_id: jg9lyjwwg + job_id: jpxk78k85 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -206,13 +123,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:51:27Z' + timestamp: '2024-11-09T23:47:00Z' - torchscript_onnx_tflite: - inference_time: 393.0 - throughput: 2544.529262086514 + inference_time: 420.0 + throughput: 2380.9523809523807 estimated_peak_memory_range: min: 8192 - max: 25343680 + max: 24632816 primary_compute_unit: NPU precision: int8 layer_info: @@ -220,14 +137,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jprv2820g + job_id: jpv61y6z5 job_status: Passed torchscript_onnx_qnn: - inference_time: 434.0 - throughput: 2304.147465437788 + inference_time: 489.0 + throughput: 2044.9897750511248 estimated_peak_memory_range: - min: 163840 - max: 16594976 + min: 0 + max: 17078256 primary_compute_unit: NPU precision: int8 layer_info: @@ -235,22 +152,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jgjv2zevg - job_status: Passed - torchscript_onnx: - inference_time: 613.0 - throughput: 1631.3213703099511 - estimated_peak_memory_range: - min: 0 - max: 34808592 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 130 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 130 - job_id: jgdxqeorp + job_id: j5mnw1n7p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -259,13 +161,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:51:31Z' + timestamp: '2024-11-09T23:47:02Z' - torchscript_onnx_tflite: - inference_time: 2334.0 - throughput: 428.4490145672665 + inference_time: 2574.0 + throughput: 388.5003885003885 estimated_peak_memory_range: - min: 32768 - max: 28979792 + min: 0 + max: 26897856 primary_compute_unit: NPU precision: int8 layer_info: @@ -273,14 +175,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jp14wyw8p + job_id: jgjv06v1g job_status: Passed torchscript_onnx_qnn: - inference_time: 3189.0 - throughput: 313.5779241141424 + inference_time: 2843.0 + throughput: 351.74111853675697 estimated_peak_memory_range: - min: 204800 - max: 8185136 + min: 32768 + max: 8367776 primary_compute_unit: NPU precision: int8 layer_info: @@ -288,7 +190,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jgkeq7qwg + job_id: jgn69d6j5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -297,13 +199,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-27T00:51:07Z' + timestamp: '2024-11-09T23:46:43Z' - torchscript_onnx_tflite: - inference_time: 7627.0 - throughput: 131.1131506490101 + inference_time: 7776.0 + throughput: 128.6008230452675 estimated_peak_memory_range: - min: 53248 - max: 1905896 + min: 12288 + max: 2291424 primary_compute_unit: NPU precision: int8 layer_info: @@ -311,7 +213,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jgdxqeqrp + job_id: jpedr0d85 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -320,13 +222,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-27T00:50:48Z' + timestamp: '2024-11-09T23:46:26Z' - torchscript_onnx_tflite: - inference_time: 585.0 - throughput: 1709.4017094017095 + inference_time: 590.0 + throughput: 1694.915254237288 estimated_peak_memory_range: - min: 24576 - max: 1357312 + min: 20480 + max: 3921280 primary_compute_unit: NPU precision: int8 layer_info: @@ -334,14 +236,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: j57yl0lv5 + job_id: jgz3xq345 job_status: Passed torchscript_onnx_qnn: - inference_time: 649.0 - throughput: 1540.8320493066255 + inference_time: 655.0 + throughput: 1526.7175572519084 estimated_peak_memory_range: - min: 184320 - max: 1368328 + min: 0 + max: 1174264 primary_compute_unit: NPU precision: int8 layer_info: @@ -349,7 +251,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: j5q6rernp + job_id: jprv4mvkg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -358,13 +260,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:51:09Z' + timestamp: '2024-11-09T23:46:45Z' - torchscript_onnx_tflite: - inference_time: 598.0 - throughput: 1672.2408026755852 + inference_time: 590.0 + throughput: 1694.915254237288 estimated_peak_memory_range: min: 12288 - max: 20087624 + max: 15166824 primary_compute_unit: NPU precision: int8 layer_info: @@ -372,14 +274,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: j5we3ne35 + job_id: j5wed0e45 job_status: Passed torchscript_onnx_qnn: - inference_time: 647.0 - throughput: 1545.595054095827 + inference_time: 653.0 + throughput: 1531.3935681470139 estimated_peak_memory_range: - min: 192512 - max: 1500128 + min: 180224 + max: 1659344 primary_compute_unit: NPU precision: int8 layer_info: @@ -387,7 +289,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: j57yl3yv5 + job_id: jpy14k10p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -396,13 +298,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T02:07:35Z' + timestamp: '2024-11-09T23:46:48Z' - torchscript_onnx_tflite: - inference_time: 582.0 - throughput: 1718.213058419244 + inference_time: 590.0 + throughput: 1694.915254237288 estimated_peak_memory_range: min: 16384 - max: 21097384 + max: 9749432 primary_compute_unit: NPU precision: int8 layer_info: @@ -410,14 +312,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jg9lyelwg + job_id: jg9l37lmg job_status: Passed torchscript_onnx_qnn: - inference_time: 646.0 - throughput: 1547.9876160990711 + inference_time: 648.0 + throughput: 1543.20987654321 estimated_peak_memory_range: - min: 208896 - max: 1647240 + min: 184320 + max: 1472296 primary_compute_unit: NPU precision: int8 layer_info: @@ -425,7 +327,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jp4ld0l85 + job_id: jp0z18405 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -434,13 +336,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T02:07:36Z' + timestamp: '2024-11-09T23:46:50Z' - torchscript_onnx_tflite: - inference_time: 586.0 - throughput: 1706.4846416382252 + inference_time: 590.0 + throughput: 1694.915254237288 estimated_peak_memory_range: - min: 12288 - max: 1634960 + min: 16384 + max: 5231976 primary_compute_unit: NPU precision: int8 layer_info: @@ -448,14 +350,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jgdxqlxrp + job_id: jp14dk4np job_status: Passed torchscript_onnx_qnn: - inference_time: 654.0 - throughput: 1529.051987767584 + inference_time: 651.0 + throughput: 1536.0983102918588 estimated_peak_memory_range: min: 180224 - max: 1616176 + max: 1374672 primary_compute_unit: NPU precision: int8 layer_info: @@ -463,7 +365,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: j5mn6yndp + job_id: jp8q3d2qp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -472,13 +374,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T02:07:40Z' + timestamp: '2024-11-09T23:46:52Z' - torchscript_onnx_tflite: - inference_time: 1075.0 - throughput: 930.2325581395348 + inference_time: 1082.0 + throughput: 924.2144177449168 estimated_peak_memory_range: min: 12288 - max: 23391488 + max: 23345376 primary_compute_unit: NPU precision: int8 layer_info: @@ -486,14 +388,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jp14wx48p + job_id: jgdxryx6p job_status: Passed torchscript_onnx_qnn: - inference_time: 1485.0 - throughput: 673.4006734006734 + inference_time: 1310.0 + throughput: 763.3587786259542 estimated_peak_memory_range: - min: 167936 - max: 5885344 + min: 0 + max: 5887536 primary_compute_unit: NPU precision: int8 layer_info: @@ -501,7 +403,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jpxk62k35 + job_id: jgkelwvvg job_status: Passed reference_device_info: name: SA8295P ADP @@ -510,13 +412,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T02:07:38Z' + timestamp: '2024-11-09T23:46:54Z' - torchscript_onnx_tflite: - inference_time: 701.0 - throughput: 1426.5335235378031 + inference_time: 713.0 + throughput: 1402.5245441795232 estimated_peak_memory_range: - min: 12288 - max: 75014096 + min: 16384 + max: 73590800 primary_compute_unit: NPU precision: int8 layer_info: @@ -524,14 +426,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jgn6mlmk5 + job_id: j57yj1yn5 job_status: Passed torchscript_onnx_qnn: - inference_time: 769.0 - throughput: 1300.3901170351105 + inference_time: 757.0 + throughput: 1321.003963011889 estimated_peak_memory_range: - min: 159744 - max: 20187456 + min: 167936 + max: 22091728 primary_compute_unit: NPU precision: int8 layer_info: @@ -539,7 +441,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jpv6r0vk5 + job_id: j5q67x0ep job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -548,13 +450,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:51:18Z' + timestamp: '2024-11-09T23:46:56Z' - torchscript_onnx_qnn: - inference_time: 735.0 - throughput: 1360.544217687075 + inference_time: 728.0 + throughput: 1373.6263736263736 estimated_peak_memory_range: - min: 471040 - max: 471040 + min: 421888 + max: 421888 primary_compute_unit: NPU precision: int8 layer_info: @@ -562,22 +464,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jglv262j5 - job_status: Passed - torchscript_onnx: - inference_time: 805.0 - throughput: 1242.2360248447205 - estimated_peak_memory_range: - min: 27521024 - max: 27521024 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 130 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 130 - job_id: jp14wye8p + job_id: jp2k7qk6p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -586,4 +473,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:51:29Z' + timestamp: '2024-11-09T23:47:04Z' diff --git a/qai_hub_models/models/indus_1b_quantized/README.md b/qai_hub_models/models/indus_1b_quantized/README.md index 4950ce2e..f474206c 100644 --- a/qai_hub_models/models/indus_1b_quantized/README.md +++ b/qai_hub_models/models/indus_1b_quantized/README.md @@ -5,8 +5,7 @@ Indus is today a 1.2 billion parameter model and has been supervised fine tuned for Hindi and dialects. -This is based on the implementation of IndusQ-1.1B found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/indus_1b_quantized). diff --git a/qai_hub_models/models/jais_6p7b_chat_quantized/README.md b/qai_hub_models/models/jais_6p7b_chat_quantized/README.md index fb207748..da60bb5f 100644 --- a/qai_hub_models/models/jais_6p7b_chat_quantized/README.md +++ b/qai_hub_models/models/jais_6p7b_chat_quantized/README.md @@ -5,8 +5,7 @@ JAIS 6.7B is a bilingual large language model (LLM) for both Arabic and English developed by Inception, a G42 company in partnership with MBZUAI and Cerebras. This is a 6.7 billion parameter LLM, trained on a dataset containing 141 billion Arabic tokens and 339 billion English/code tokens. The model is based on transformer-based decoder-only (GPT-3) architecture and uses SwiGLU non-linearity. It implements ALiBi position embeddings, enabling the model to extrapolate to long sequence lengths, providing improved context handling and model precision. The JAIS family of models is a comprehensive series of bilingual English-Arabic LLMs. These models are optimized to excel in Arabic while having strong English capabilities. -This is based on the implementation of JAIS-6p7b-Chat found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/jais_6p7b_chat_quantized). diff --git a/qai_hub_models/models/lama_dilated/README.md b/qai_hub_models/models/lama_dilated/README.md index b84123a9..696b2f3d 100644 --- a/qai_hub_models/models/lama_dilated/README.md +++ b/qai_hub_models/models/lama_dilated/README.md @@ -5,8 +5,7 @@ LaMa-Dilated is a machine learning model that allows to erase and in-paint part of given input image. -This is based on the implementation of LaMa-Dilated found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/lama_dilated). diff --git a/qai_hub_models/models/lama_dilated/perf.yaml b/qai_hub_models/models/lama_dilated/perf.yaml index 8b248e68..a8e2b042 100644 --- a/qai_hub_models/models/lama_dilated/perf.yaml +++ b/qai_hub_models/models/lama_dilated/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: LaMa-Dilated performance_metrics: - torchscript_onnx_tflite: - inference_time: 74864.0 - throughput: 13.357555033126737 + inference_time: 74751.0 + throughput: 13.377747454883547 estimated_peak_memory_range: - min: 3260416 - max: 137900432 + min: 208896 + max: 302711536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 343 - job_id: jgdxor66p + job_id: jgz3xzjz5 job_status: Passed torchscript_onnx_qnn: - inference_time: 70395.0 - throughput: 14.205554371759359 + inference_time: 70397.0 + throughput: 14.205150787675612 estimated_peak_memory_range: - min: 4259840 - max: 40825824 + min: 5943296 + max: 37461552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,7 +70,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 332 - job_id: j57yxjon5 + job_id: j57yjoxr5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -77,36 +79,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:45:12Z' - - torchscript_onnx_qnn: - inference_time: 70453.0 - throughput: 14.193859736278085 - estimated_peak_memory_range: - min: 4407296 - max: 5784328 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 332 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 332 - job_id: jp4lvxe25 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:45:13Z' + timestamp: '2024-11-09T22:48:09Z' - torchscript_onnx_tflite: - inference_time: 63057.0 - throughput: 15.85866755475205 + inference_time: 51416.0 + throughput: 19.449198693013848 estimated_peak_memory_range: - min: 2269184 - max: 275085680 + min: 3174400 + max: 275110144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -114,14 +93,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 343 - job_id: jgo2n6l4p + job_id: j5wedy3z5 job_status: Passed torchscript_onnx_qnn: - inference_time: 50132.0 - throughput: 19.947339024974067 + inference_time: 48637.0 + throughput: 20.560478647942926 estimated_peak_memory_range: - min: 4337664 - max: 12886000 + min: 258842624 + max: 349200896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -129,7 +108,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 332 - job_id: jpxky7085 + job_id: jp4lxevl5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -138,13 +117,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-31T14:45:14Z' + timestamp: '2024-11-09T22:48:10Z' - torchscript_onnx_tflite: - inference_time: 49012.0 - throughput: 20.40316657145189 + inference_time: 49097.0 + throughput: 20.367843249078355 estimated_peak_memory_range: - min: 2396160 - max: 170253792 + min: 1511424 + max: 168366032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -152,14 +131,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 343 - job_id: jg9lynzqg + job_id: jg9l3oyqg job_status: Passed torchscript_onnx_qnn: - inference_time: 38013.0 - throughput: 26.30678978244285 + inference_time: 45717.0 + throughput: 21.87370124898834 estimated_peak_memory_range: - min: 1392640 - max: 92325760 + min: 0 + max: 91198560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -167,7 +146,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 332 - job_id: j5mn3w97p + job_id: jpxk70y95 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -176,13 +155,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-31T14:45:15Z' + timestamp: '2024-11-09T22:48:12Z' - torchscript_onnx_tflite: - inference_time: 74524.0 - throughput: 13.41849605496216 + inference_time: 74982.0 + throughput: 13.336534101517698 estimated_peak_memory_range: - min: 3268608 - max: 138125496 + min: 3244032 + max: 137835904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -190,14 +169,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 343 - job_id: jpv6rkl75 + job_id: jp14dowkp job_status: Passed torchscript_onnx_qnn: - inference_time: 70301.0 - throughput: 14.224548726191662 + inference_time: 70211.0 + throughput: 14.242782469983336 estimated_peak_memory_range: - min: 4362240 - max: 5477784 + min: 3235840 + max: 6488696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -205,7 +184,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 332 - job_id: jgn6391j5 + job_id: j5mnw93qp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -214,13 +193,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-31T14:45:16Z' + timestamp: '2024-11-09T22:48:13Z' - torchscript_onnx_tflite: - inference_time: 75426.0 - throughput: 13.258027735794023 + inference_time: 74665.0 + throughput: 13.393156097234312 estimated_peak_memory_range: - min: 3268608 - max: 137783160 + min: 3235840 + max: 137314448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -228,14 +207,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 343 - job_id: jgkeqn7ng + job_id: jgdxr6qkp job_status: Passed torchscript_onnx_qnn: - inference_time: 70345.0 - throughput: 14.215651432226881 + inference_time: 70377.0 + throughput: 14.209187660741435 estimated_peak_memory_range: - min: 4440064 - max: 5713768 + min: 4403200 + max: 6102448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -243,7 +222,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 332 - job_id: j56yzjzyp + job_id: jprv4xeeg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -252,13 +231,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:40:07Z' + timestamp: '2024-11-09T22:48:15Z' - torchscript_onnx_tflite: - inference_time: 74647.0 - throughput: 13.39638565515024 + inference_time: 75800.0 + throughput: 13.192612137203167 estimated_peak_memory_range: - min: 151552 - max: 2286016 + min: 3264512 + max: 137854880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -266,14 +245,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 343 - job_id: j5q6rkeop + job_id: j5wedy3j5 job_status: Passed torchscript_onnx_qnn: - inference_time: 70941.0 - throughput: 14.09622080320266 + inference_time: 70599.0 + throughput: 14.164506579413306 estimated_peak_memory_range: - min: 4182016 - max: 5576560 + min: 4427776 + max: 5716688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -281,7 +260,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 332 - job_id: jgo2n0nkp + job_id: jp2k7olmp job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -290,13 +269,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:40:08Z' + timestamp: '2024-11-09T22:48:16Z' - torchscript_onnx_tflite: - inference_time: 74817.0 - throughput: 13.365946242164213 + inference_time: 75145.0 + throughput: 13.307605296426908 estimated_peak_memory_range: - min: 3293184 - max: 138230944 + min: 3289088 + max: 137993536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -304,14 +283,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 343 - job_id: jglv2z2m5 + job_id: jg9l3oyvg job_status: Passed torchscript_onnx_qnn: - inference_time: 71208.0 - throughput: 14.043365913942253 + inference_time: 70339.0 + throughput: 14.216864044129146 estimated_peak_memory_range: - min: 4485120 - max: 5795320 + min: 3358720 + max: 6565248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -319,7 +298,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 332 - job_id: jpv6rorr5 + job_id: jpy14864p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -328,13 +307,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:40:09Z' + timestamp: '2024-11-09T22:48:17Z' - torchscript_onnx_tflite: - inference_time: 106161.0 - throughput: 9.419655052231986 + inference_time: 115903.0 + throughput: 8.627904368307982 estimated_peak_memory_range: - min: 3198976 - max: 168035104 + min: 3313664 + max: 93192160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -342,14 +321,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 343 - job_id: j5we36lz5 + job_id: jp14dowlp job_status: Passed torchscript_onnx_qnn: - inference_time: 102707.0 - throughput: 9.736434712337037 + inference_time: 103899.0 + throughput: 9.624731710603568 estimated_peak_memory_range: - min: 4292608 - max: 48716928 + min: 1265664 + max: 7125024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -357,22 +336,37 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 332 - job_id: jp8qz3rqp + job_id: jp8q3jz8p job_status: Passed reference_device_info: - name: QCS8450 (Proxy) - os: '13' - form_factor: Xr + name: SA8295P ADP + os: '14' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8450 Proxy - timestamp: '2024-10-31T14:45:20Z' - - torchscript_onnx_qnn: - inference_time: 71994.0 - throughput: 13.890046392754952 + chipset: SA8295P + timestamp: '2024-11-09T22:48:18Z' + - torchscript_onnx_tflite: + inference_time: 114611.0 + throughput: 8.72516599628308 estimated_peak_memory_range: - min: 4202496 - max: 4202496 + min: 3551232 + max: 169958528 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 343 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 343 + job_id: jgdxr6qlp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 99332.0 + throughput: 10.06724922482181 + estimated_peak_memory_range: + min: 4120576 + max: 48895808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -380,13 +374,21 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 332 - job_id: jprve4xkg + job_id: jgkel63og job_status: Passed reference_device_info: + name: QCS8450 (Proxy) + os: '13' + form_factor: Xr + os_name: Android + manufacturer: Qualcomm + chipset: QCS8450 Proxy + timestamp: '2024-11-09T22:48:20Z' + - reference_device_info: name: Snapdragon X Elite CRD os: '11' form_factor: Compute os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-31T14:45:17Z' + timestamp: '2024-11-09T22:48:14Z' diff --git a/qai_hub_models/models/litehrnet/README.md b/qai_hub_models/models/litehrnet/README.md index 8084a6c2..ba1924c7 100644 --- a/qai_hub_models/models/litehrnet/README.md +++ b/qai_hub_models/models/litehrnet/README.md @@ -5,8 +5,7 @@ LiteHRNet is a machine learning model that detects human pose and returns a location and confidence for each of 17 joints. -This is based on the implementation of LiteHRNet found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/litehrnet). diff --git a/qai_hub_models/models/litehrnet/export.py b/qai_hub_models/models/litehrnet/export.py index 36f52b50..888e5105 100644 --- a/qai_hub_models/models/litehrnet/export.py +++ b/qai_hub_models/models/litehrnet/export.py @@ -205,9 +205,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser( - model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False - ) + parser = export_parser(model_cls=Model, supports_qnn=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/litehrnet/perf.yaml b/qai_hub_models/models/litehrnet/perf.yaml index fe5d0ac9..7b18b5a3 100644 --- a/qai_hub_models/models/litehrnet/perf.yaml +++ b/qai_hub_models/models/litehrnet/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,55 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: LiteHRNet performance_metrics: - torchscript_onnx_tflite: - inference_time: 7955.0 - throughput: 125.70710245128849 - estimated_peak_memory_range: - min: 270336 - max: 2505216 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 1233 - layers_on_gpu: 0 - layers_on_cpu: 2 - total_layers: 1235 - job_id: jgz3jdm45 - job_status: Passed - torchscript_onnx: - inference_time: 7192.0 - throughput: 139.04338153503892 - estimated_peak_memory_range: - min: 1036288 - max: 3780392 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 1254 - layers_on_gpu: 0 - layers_on_cpu: 4 - total_layers: 1258 - job_id: jprv23lvg - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:00:44Z' - - torchscript_onnx_tflite: - inference_time: 7966.0 - throughput: 125.53351744915892 + inference_time: 7964.0 + throughput: 125.56504269211452 estimated_peak_memory_range: min: 249856 - max: 3973912 + max: 2292608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -91,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 1235 - job_id: j5we36745 + job_id: jp14doykp job_status: Passed torchscript_onnx: - inference_time: 7220.0 - throughput: 138.50415512465375 + inference_time: 7174.0 + throughput: 139.39224979091162 estimated_peak_memory_range: - min: 20480 - max: 6733120 + min: 200704 + max: 167438824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,7 +70,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 4 total_layers: 1258 - job_id: jp2k9yrxp + job_id: j5q674r7p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -115,13 +79,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:00:45Z' + timestamp: '2024-11-09T22:47:27Z' - torchscript_onnx_tflite: - inference_time: 4923.0 - throughput: 203.12817387771685 + inference_time: 4917.0 + throughput: 203.3760423022168 estimated_peak_memory_range: min: 241664 - max: 99857072 + max: 102018688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -129,14 +93,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 1235 - job_id: jg9lynmmg + job_id: jgdxr6ekp job_status: Passed torchscript_onnx: - inference_time: 4583.0 - throughput: 218.1976871045167 + inference_time: 4418.0 + throughput: 226.34676324128566 estimated_peak_memory_range: - min: 606208 - max: 111396304 + min: 1028096 + max: 114115888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -144,7 +108,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 4 total_layers: 1258 - job_id: jpy1j3orp + job_id: jglv0w2e5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -153,13 +117,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:00:46Z' + timestamp: '2024-11-09T22:47:28Z' - torchscript_onnx_tflite: - inference_time: 4343.0 - throughput: 230.25558369790468 + inference_time: 5295.0 + throughput: 188.85741265344666 estimated_peak_memory_range: - min: 12288 - max: 71218560 + min: 221184 + max: 71241552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -167,14 +131,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 1235 - job_id: jgdxq13kp + job_id: j57yjo0q5 job_status: Passed torchscript_onnx: - inference_time: 4850.0 - throughput: 206.18556701030928 + inference_time: 4829.0 + throughput: 207.08221163802028 estimated_peak_memory_range: - min: 1028096 - max: 83438864 + min: 1024000 + max: 83296704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -182,7 +146,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 4 total_layers: 1258 - job_id: jp8qmyezp + job_id: j56y3ozvp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -191,13 +155,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:00:48Z' + timestamp: '2024-11-09T22:47:29Z' - torchscript_onnx_tflite: - inference_time: 7926.0 - throughput: 126.16704516780217 + inference_time: 7939.0 + throughput: 125.96044841919637 estimated_peak_memory_range: - min: 266240 - max: 2514048 + min: 225280 + max: 158385688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -205,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 1235 - job_id: jp14wzjnp + job_id: jp4lxekq5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -214,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:00:29Z' + timestamp: '2024-11-09T22:47:13Z' - torchscript_onnx_tflite: - inference_time: 7921.0 - throughput: 126.24668602449185 + inference_time: 7958.0 + throughput: 125.65971349585323 estimated_peak_memory_range: - min: 249856 - max: 3395312 + min: 253952 + max: 2202000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -228,7 +192,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 1235 - job_id: jprv2q87g + job_id: jpxk70nj5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -237,13 +201,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:39:33Z' + timestamp: '2024-11-09T22:47:14Z' - torchscript_onnx_tflite: - inference_time: 7956.0 - throughput: 125.69130216189039 + inference_time: 7942.0 + throughput: 125.91286829513976 estimated_peak_memory_range: - min: 266240 - max: 2661712 + min: 245760 + max: 2703864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -251,7 +215,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 1235 - job_id: jp2k960qp + job_id: j5mnw9qyp job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -260,13 +224,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:39:34Z' + timestamp: '2024-11-09T22:47:16Z' - torchscript_onnx_tflite: - inference_time: 7949.0 - throughput: 125.80198767140521 + inference_time: 7921.0 + throughput: 126.24668602449185 estimated_peak_memory_range: - min: 258048 - max: 2469416 + min: 270336 + max: 2345592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -274,7 +238,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 1235 - job_id: jpy1jwrlp + job_id: jgn691mv5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -283,13 +247,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:39:35Z' + timestamp: '2024-11-09T22:47:17Z' - torchscript_onnx_tflite: - inference_time: 8583.0 - throughput: 116.5093790050099 + inference_time: 9944.0 + throughput: 100.5631536604988 estimated_peak_memory_range: - min: 249856 - max: 89100448 + min: 229376 + max: 63910720 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 1233 + layers_on_gpu: 0 + layers_on_cpu: 2 + total_layers: 1235 + job_id: jprv4x2vg + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:47:18Z' + - torchscript_onnx_tflite: + inference_time: 8569.0 + throughput: 116.69973159061735 + estimated_peak_memory_range: + min: 262144 + max: 89294528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -297,7 +284,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 1235 - job_id: jp14wzjkp + job_id: jp2k7o9xp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -306,13 +293,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:00:34Z' + timestamp: '2024-11-09T22:47:19Z' - torchscript_onnx: - inference_time: 8090.0 - throughput: 123.60939431396787 + inference_time: 8124.0 + throughput: 123.09207287050714 estimated_peak_memory_range: - min: 4718592 - max: 4718592 + min: 6094848 + max: 6094848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -320,7 +307,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 4 total_layers: 1258 - job_id: jp0z20m25 + job_id: jp3j4o1xg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -329,4 +316,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:00:47Z' + timestamp: '2024-11-09T22:47:30Z' diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/README.md b/qai_hub_models/models/llama_v2_7b_chat_quantized/README.md index 0463700d..94fa9334 100644 --- a/qai_hub_models/models/llama_v2_7b_chat_quantized/README.md +++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/README.md @@ -5,8 +5,7 @@ Llama 2 is a family of LLMs. The "Chat" at the end indicates that the model is optimized for chatbot-like dialogue. The model is quantized to w4a16(4-bit weights and 16-bit activations) and part of the model is quantized to w8a16(8-bit weights and 16-bit activations) making it suitable for on-device deployment. For Prompt and output length specified below, the time to first token is Llama-PromptProcessor-Quantized's latency and average time per addition token is Llama-TokenGenerator-KVCache-Quantized's latency. -This is based on the implementation of Llama-v2-7B-Chat found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/llama_v2_7b_chat_quantized). diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/export.py b/qai_hub_models/models/llama_v2_7b_chat_quantized/export.py index 78e97d21..504df37c 100644 --- a/qai_hub_models/models/llama_v2_7b_chat_quantized/export.py +++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/export.py @@ -306,6 +306,10 @@ def export_model( for sub_component_name in ALL_SUB_COMPONENTS[component_name] } + print( + "These models can be deployed on-device using the Genie SDK. For a full tutorial, please follow the instructions here: https://github.com/quic/ai-hub-apps/tree/main/tutorials/llm_on_genie." + ) + def main(): warnings.filterwarnings("ignore") diff --git a/qai_hub_models/models/llama_v3_1_8b_chat_quantized/README.md b/qai_hub_models/models/llama_v3_1_8b_chat_quantized/README.md index b49510f6..f8642a13 100644 --- a/qai_hub_models/models/llama_v3_1_8b_chat_quantized/README.md +++ b/qai_hub_models/models/llama_v3_1_8b_chat_quantized/README.md @@ -5,8 +5,7 @@ Llama 3 is a family of LLMs. The "Chat" at the end indicates that the model is optimized for chatbot-like dialogue. The model is quantized to w4a16 (4-bit weights and 16-bit activations) and part of the model is quantized to w8a16 (8-bit weights and 16-bit activations) making it suitable for on-device deployment. For Prompt and output length specified below, the time to first token is Llama-PromptProcessor-Quantized's latency and average time per addition token is Llama-TokenGenerator-Quantized's latency. -This is based on the implementation of Llama-v3.1-8B-Chat found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/llama_v3_1_8b_chat_quantized). diff --git a/qai_hub_models/models/llama_v3_2_3b_chat_quantized/README.md b/qai_hub_models/models/llama_v3_2_3b_chat_quantized/README.md index fcd3721b..5cf5092c 100644 --- a/qai_hub_models/models/llama_v3_2_3b_chat_quantized/README.md +++ b/qai_hub_models/models/llama_v3_2_3b_chat_quantized/README.md @@ -5,8 +5,7 @@ Llama 3 is a family of LLMs. The "Chat" at the end indicates that the model is optimized for chatbot-like dialogue. The model is quantized to w4a16 (4-bit weights and 16-bit activations) and part of the model is quantized to w8a16 (8-bit weights and 16-bit activations) making it suitable for on-device deployment. For Prompt and output length specified below, the time to first token is Llama-PromptProcessor-Quantized's latency and average time per addition token is Llama-TokenGenerator-Quantized's latency. -This is based on the implementation of Llama-v3.2-3B-Chat found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/llama_v3_2_3b_chat_quantized). diff --git a/qai_hub_models/models/llama_v3_2_3b_chat_quantized/export.py b/qai_hub_models/models/llama_v3_2_3b_chat_quantized/export.py index 4784b5d9..4ec19ac9 100644 --- a/qai_hub_models/models/llama_v3_2_3b_chat_quantized/export.py +++ b/qai_hub_models/models/llama_v3_2_3b_chat_quantized/export.py @@ -34,7 +34,7 @@ def main(): parser = export_parser( model_cls=Model, supports_tflite=False, - supports_precompiled_qnn_onnx=False, + supports_onnx=False, default_export_device=DEFAULT_EXPORT_DEVICE, ) parser.add_argument( diff --git a/qai_hub_models/models/llama_v3_8b_chat_quantized/README.md b/qai_hub_models/models/llama_v3_8b_chat_quantized/README.md index c2ca1a4b..9ee03499 100644 --- a/qai_hub_models/models/llama_v3_8b_chat_quantized/README.md +++ b/qai_hub_models/models/llama_v3_8b_chat_quantized/README.md @@ -5,8 +5,7 @@ Llama 3 is a family of LLMs. The "Chat" at the end indicates that the model is optimized for chatbot-like dialogue. The model is quantized to w4a16 (4-bit weights and 16-bit activations) and part of the model is quantized to w8a16 (8-bit weights and 16-bit activations) making it suitable for on-device deployment. For Prompt and output length specified below, the time to first token is Llama-PromptProcessor-Quantized's latency and average time per addition token is Llama-TokenGenerator-Quantized's latency. -This is based on the implementation of Llama-v3-8B-Chat found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/llama_v3_8b_chat_quantized). diff --git a/qai_hub_models/models/llama_v3_8b_chat_quantized/export.py b/qai_hub_models/models/llama_v3_8b_chat_quantized/export.py index 3ed9600d..a18bff6b 100644 --- a/qai_hub_models/models/llama_v3_8b_chat_quantized/export.py +++ b/qai_hub_models/models/llama_v3_8b_chat_quantized/export.py @@ -34,7 +34,7 @@ def main(): parser = export_parser( model_cls=Model, supports_tflite=False, - supports_precompiled_qnn_onnx=False, + supports_onnx=False, default_export_device=DEFAULT_EXPORT_DEVICE, ) parser.add_argument( diff --git a/qai_hub_models/models/mediapipe_face/README.md b/qai_hub_models/models/mediapipe_face/README.md index 9d82ada2..a982de1f 100644 --- a/qai_hub_models/models/mediapipe_face/README.md +++ b/qai_hub_models/models/mediapipe_face/README.md @@ -5,8 +5,7 @@ Designed for sub-millisecond processing, this model predicts bounding boxes and pose skeletons (left eye, right eye, nose tip, mouth, left eye tragion, and right eye tragion) of faces in an image. -This is based on the implementation of MediaPipe-Face-Detection found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/mediapipe_face). diff --git a/qai_hub_models/models/mediapipe_face/perf.yaml b/qai_hub_models/models/mediapipe_face/perf.yaml index a494de5e..bc95ba5f 100644 --- a/qai_hub_models/models/mediapipe_face/perf.yaml +++ b/qai_hub_models/models/mediapipe_face/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: MediaPipeFaceDetector performance_metrics: - torchscript_onnx_tflite: - inference_time: 549.0 - throughput: 1821.4936247723133 + inference_time: 545.0 + throughput: 1834.8623853211009 estimated_peak_memory_range: - min: 12288 - max: 1379448 + min: 16384 + max: 2025744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 111 - job_id: jgkeqx9vg + job_id: jglv0wve5 job_status: Passed torchscript_onnx_qnn: - inference_time: 623.0 - throughput: 1605.1364365971108 + inference_time: 628.0 + throughput: 1592.3566878980891 estimated_peak_memory_range: - min: 823296 - max: 5867584 + min: 806912 + max: 5801472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jgn6mv7j5 + job_id: jp2k7oxxp job_status: Passed torchscript_onnx: - inference_time: 1004.0 - throughput: 996.01593625498 + inference_time: 987.0 + throughput: 1013.1712259371834 estimated_peak_memory_range: - min: 266240 - max: 3127728 + min: 397312 + max: 2682544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: j57ylr9n5 + job_id: jpxk709j5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:50:50Z' + timestamp: '2024-11-09T22:35:54Z' - torchscript_onnx_tflite: - inference_time: 549.0 - throughput: 1821.4936247723133 + inference_time: 409.0 + throughput: 2444.987775061125 estimated_peak_memory_range: - min: 20480 - max: 1533696 + min: 16384 + max: 35140304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 111 - job_id: jglv2m125 + job_id: jp3j4ojxg job_status: Passed torchscript_onnx_qnn: - inference_time: 624.0 - throughput: 1602.5641025641025 + inference_time: 461.0 + throughput: 2169.1973969631235 estimated_peak_memory_range: - min: 811008 - max: 78629168 + min: 0 + max: 15518096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jp2k9yv6p + job_id: jp0z1o425 job_status: Passed torchscript_onnx: - inference_time: 1006.0 - throughput: 994.0357852882704 + inference_time: 753.0 + throughput: 1328.0212483399735 estimated_peak_memory_range: - min: 655360 - max: 79986656 + min: 0 + max: 40463744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jpxk6ox85 + job_id: jgn691lv5 job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:50:52Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T22:35:56Z' - torchscript_onnx_tflite: - inference_time: 490.0 - throughput: 2040.8163265306123 + inference_time: 434.0 + throughput: 2304.147465437788 estimated_peak_memory_range: - min: 12288 - max: 36066288 + min: 8192 + max: 24684976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 111 - job_id: jp3j10wmg + job_id: jpv612675 job_status: Passed torchscript_onnx_qnn: - inference_time: 506.0 - throughput: 1976.2845849802372 + inference_time: 457.0 + throughput: 2188.183807439825 estimated_peak_memory_range: - min: 802816 - max: 16451952 + min: 0 + max: 12706384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jp0z20605 + job_id: jgkel6vyg job_status: Passed torchscript_onnx: - inference_time: 814.0 - throughput: 1228.5012285012285 + inference_time: 749.0 + throughput: 1335.1134846461948 estimated_peak_memory_range: min: 0 - max: 40470592 + max: 28672544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jgn6mvkj5 + job_id: jp2k7o0xp job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:50:54Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T22:35:58Z' - torchscript_onnx_tflite: - inference_time: 435.0 - throughput: 2298.8505747126437 + inference_time: 544.0 + throughput: 1838.235294117647 estimated_peak_memory_range: - min: 8192 - max: 25363568 + min: 24576 + max: 1420368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 111 - job_id: jpxk6od85 + job_id: jpedr6d75 job_status: Passed torchscript_onnx_qnn: - inference_time: 458.0 - throughput: 2183.406113537118 + inference_time: 602.0 + throughput: 1661.1295681063123 estimated_peak_memory_range: - min: 0 - max: 12416736 + min: 819200 + max: 2113560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jp14wz6np - job_status: Passed - torchscript_onnx: - inference_time: 752.0 - throughput: 1329.787234042553 - estimated_peak_memory_range: - min: 0 - max: 28908448 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 147 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 147 - job_id: jp0z20j05 + job_id: jglv0w4e5 job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:50:57Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T22:35:40Z' - torchscript_onnx_tflite: inference_time: 547.0 throughput: 1828.1535648994516 estimated_peak_memory_range: min: 12288 - max: 1348848 + max: 8192952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 111 - job_id: jpv6rk9z5 + job_id: j5wedyez5 job_status: Passed torchscript_onnx_qnn: - inference_time: 604.0 - throughput: 1655.6291390728477 + inference_time: 608.0 + throughput: 1644.7368421052631 estimated_peak_memory_range: - min: 811008 - max: 2139568 + min: 823296 + max: 2399352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jgkeqx8vg + job_id: jpv612q75 job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:50:36Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T22:35:43Z' - torchscript_onnx_tflite: - inference_time: 545.0 - throughput: 1834.8623853211009 + inference_time: 549.0 + throughput: 1821.4936247723133 estimated_peak_memory_range: - min: 12288 - max: 74946208 + min: 24576 + max: 1385088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 111 - job_id: j5we3vw35 + job_id: jp14do4kp job_status: Passed torchscript_onnx_qnn: - inference_time: 603.0 - throughput: 1658.374792703151 + inference_time: 612.0 + throughput: 1633.986928104575 estimated_peak_memory_range: - min: 24576 - max: 1289264 + min: 827392 + max: 2321816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jp14wl27p + job_id: jpedr6o75 job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:29:29Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T22:35:45Z' - torchscript_onnx_tflite: - inference_time: 549.0 - throughput: 1821.4936247723133 + inference_time: 558.0 + throughput: 1792.1146953405018 estimated_peak_memory_range: - min: 0 - max: 2613296 + min: 24576 + max: 5185768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 111 - job_id: jp14wl28p + job_id: j57yjoyq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 605.0 - throughput: 1652.892561983471 + inference_time: 602.0 + throughput: 1661.1295681063123 estimated_peak_memory_range: - min: 2240512 - max: 3829936 + min: 811008 + max: 2448992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: j57ylw295 + job_id: j5wedywz5 job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:29:31Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T22:35:47Z' - torchscript_onnx_tflite: - inference_time: 549.0 - throughput: 1821.4936247723133 + inference_time: 1135.0 + throughput: 881.0572687224669 estimated_peak_memory_range: - min: 24576 - max: 1420896 + min: 12288 + max: 22268640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 111 - job_id: j5we3vwm5 + job_id: jpxk70kj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 600.0 - throughput: 1666.6666666666667 + inference_time: 1407.0 + throughput: 710.7320540156361 estimated_peak_memory_range: - min: 16384 - max: 1347976 + min: 0 + max: 5812720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jpxk6j9l5 + job_id: jp14do2kp job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:29:32Z' + chipset: SA8295P + timestamp: '2024-11-09T22:35:50Z' - torchscript_onnx_tflite: - inference_time: 747.0 - throughput: 1338.6880856760374 + inference_time: 756.0 + throughput: 1322.7513227513227 estimated_peak_memory_range: - min: 16384 - max: 33412688 + min: 73728 + max: 31604048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 111 - job_id: j57ylr7n5 + job_id: jgn6910v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 850.0 - throughput: 1176.4705882352941 + inference_time: 833.0 + throughput: 1200.4801920768307 estimated_peak_memory_range: min: 802816 - max: 17115328 + max: 17662432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: j5we36445 + job_id: j57yjo2q5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:50:46Z' + timestamp: '2024-11-09T22:35:52Z' - torchscript_onnx_qnn: - inference_time: 788.0 - throughput: 1269.0355329949239 + inference_time: 778.0 + throughput: 1285.3470437017995 estimated_peak_memory_range: min: 786432 max: 786432 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jglv2ml25 + job_id: jp3j4onxg job_status: Passed torchscript_onnx: - inference_time: 1033.0 - throughput: 968.054211035818 + inference_time: 1060.0 + throughput: 943.3962264150944 estimated_peak_memory_range: - min: 2031616 - max: 2031616 + min: 2228224 + max: 2228224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jp2k9ye6p + job_id: jp0z1o325 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,15 +466,15 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:50:55Z' + timestamp: '2024-11-09T22:35:59Z' - name: MediaPipeFaceLandmarkDetector performance_metrics: - torchscript_onnx_tflite: inference_time: 190.0 throughput: 5263.1578947368425 estimated_peak_memory_range: - min: 12288 - max: 1391936 + min: 36864 + max: 1811976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -495,60 +482,37 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: j5q6rqmep + job_id: j56y3oyvp job_status: Passed - torchscript_onnx: - inference_time: 503.0 - throughput: 1988.0715705765408 - estimated_peak_memory_range: - min: 131072 - max: 5324976 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 106 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 106 - job_id: jp4ldr325 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:50:51Z' - - torchscript_onnx_tflite: - inference_time: 190.0 - throughput: 5263.1578947368425 + torchscript_onnx_qnn: + inference_time: 283.0 + throughput: 3533.5689045936397 estimated_peak_memory_range: - min: 0 - max: 1389744 + min: 475136 + max: 8845000 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 100 + layers_on_npu: 105 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 100 - job_id: j56yz4dnp + total_layers: 105 + job_id: jpy148zrp job_status: Passed - torchscript_onnx_qnn: - inference_time: 280.0 - throughput: 3571.4285714285716 + torchscript_onnx: + inference_time: 509.0 + throughput: 1964.6365422396857 estimated_peak_memory_range: - min: 12288 - max: 9900696 + min: 20480 + max: 2723696 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 105 + layers_on_npu: 106 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 105 - job_id: jpy1j370p + total_layers: 106 + job_id: j5mnw9eyp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -557,13 +521,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:50:52Z' + timestamp: '2024-11-09T22:35:54Z' - torchscript_onnx_tflite: - inference_time: 150.0 - throughput: 6666.666666666667 + inference_time: 146.0 + throughput: 6849.315068493151 estimated_peak_memory_range: - min: 16384 - max: 30567392 + min: 12288 + max: 29861776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -571,14 +535,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: jgo2n641p + job_id: jgo21d24p job_status: Passed torchscript_onnx_qnn: - inference_time: 216.0 - throughput: 4629.62962962963 + inference_time: 206.0 + throughput: 4854.368932038835 estimated_peak_memory_range: min: 0 - max: 12947376 + max: 12118096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -586,14 +550,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jp8qmy1qp + job_id: jp8q3j2zp job_status: Passed torchscript_onnx: - inference_time: 404.0 - throughput: 2475.2475247524753 + inference_time: 381.0 + throughput: 2624.6719160104985 estimated_peak_memory_range: min: 0 - max: 33155168 + max: 32661456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -601,7 +565,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jprv23wkg + job_id: jprv4x8vg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -610,13 +574,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:50:54Z' + timestamp: '2024-11-09T22:35:56Z' - torchscript_onnx_tflite: - inference_time: 127.0 - throughput: 7874.0157480314965 + inference_time: 143.0 + throughput: 6993.006993006993 estimated_peak_memory_range: - min: 12288 - max: 18806416 + min: 8192 + max: 18978576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -624,14 +588,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: j5mn6xd7p + job_id: jgjv03v7g job_status: Passed torchscript_onnx_qnn: inference_time: 174.0 throughput: 5747.126436781609 estimated_peak_memory_range: min: 0 - max: 10058624 + max: 10330752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -639,14 +603,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jgdxq126p + job_id: j5q67407p job_status: Passed torchscript_onnx: - inference_time: 407.0 - throughput: 2457.002457002457 + inference_time: 403.0 + throughput: 2481.3895781637716 estimated_peak_memory_range: min: 0 - max: 19036192 + max: 19263440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -654,7 +618,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jp8qmyxqp + job_id: jpy148rrp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -663,13 +627,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:50:58Z' + timestamp: '2024-11-09T22:35:58Z' - torchscript_onnx_tflite: - inference_time: 192.0 - throughput: 5208.333333333333 + inference_time: 190.0 + throughput: 5263.1578947368425 estimated_peak_memory_range: min: 12288 - max: 1279208 + max: 1314576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -677,14 +641,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: jgjv2nw1g + job_id: jgz3xz3z5 job_status: Passed torchscript_onnx_qnn: - inference_time: 275.0 - throughput: 3636.3636363636365 + inference_time: 284.0 + throughput: 3521.1267605633802 estimated_peak_memory_range: min: 466944 - max: 1740224 + max: 1665480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -692,7 +656,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: j5q6rqvep + job_id: j56y3o2vp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -701,13 +665,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:50:37Z' + timestamp: '2024-11-09T22:35:40Z' - torchscript_onnx_tflite: - inference_time: 194.0 - throughput: 5154.639175257732 + inference_time: 190.0 + throughput: 5263.1578947368425 estimated_peak_memory_range: - min: 45056 - max: 1503280 + min: 32768 + max: 1522288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -715,14 +679,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: jg9ly10wg + job_id: jg9l3olqg job_status: Passed torchscript_onnx_qnn: - inference_time: 274.0 - throughput: 3649.6350364963505 + inference_time: 275.0 + throughput: 3636.3636363636365 estimated_peak_memory_range: - min: 479232 - max: 1858680 + min: 45056 + max: 1391816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -730,7 +694,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jgdxq9nzp + job_id: jgjv03d7g job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -739,13 +703,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:29:29Z' + timestamp: '2024-11-09T22:35:44Z' - torchscript_onnx_tflite: - inference_time: 190.0 - throughput: 5263.1578947368425 + inference_time: 192.0 + throughput: 5208.333333333333 estimated_peak_memory_range: - min: 53248 - max: 10491888 + min: 24576 + max: 6381864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -753,14 +717,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: jgdxq9nrp + job_id: jgdxr6xkp job_status: Passed torchscript_onnx_qnn: inference_time: 278.0 throughput: 3597.122302158273 estimated_peak_memory_range: - min: 479232 - max: 1832048 + min: 475136 + max: 1689928 primary_compute_unit: NPU precision: fp16 layer_info: @@ -768,7 +732,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jp4ldon15 + job_id: jgz3xz2z5 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -777,13 +741,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:29:31Z' + timestamp: '2024-11-09T22:35:46Z' - torchscript_onnx_tflite: - inference_time: 189.0 - throughput: 5291.005291005291 + inference_time: 191.0 + throughput: 5235.602094240838 estimated_peak_memory_range: - min: 28672 - max: 1428856 + min: 20480 + max: 1431568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -791,14 +755,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: jg9ly108g + job_id: jp4lxelq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 276.0 - throughput: 3623.1884057971015 + inference_time: 279.0 + throughput: 3584.2293906810037 estimated_peak_memory_range: - min: 471040 - max: 1832344 + min: 466944 + max: 1875384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -806,7 +770,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: j5mn62e9p + job_id: jg9l3o0qg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -815,13 +779,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:29:33Z' + timestamp: '2024-11-09T22:35:48Z' - torchscript_onnx_tflite: - inference_time: 276.0 - throughput: 3623.1884057971015 + inference_time: 562.0 + throughput: 1779.3594306049822 estimated_peak_memory_range: - min: 20480 - max: 29821328 + min: 16384 + max: 18110640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -829,14 +793,52 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: jp4ldr925 + job_id: j5mnw9nyp job_status: Passed torchscript_onnx_qnn: - inference_time: 366.0 - throughput: 2732.24043715847 + inference_time: 762.0 + throughput: 1312.3359580052493 estimated_peak_memory_range: min: 0 - max: 14132528 + max: 5940096 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 105 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 105 + job_id: jgdxr6nkp + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:35:50Z' + - torchscript_onnx_tflite: + inference_time: 278.0 + throughput: 3597.122302158273 + estimated_peak_memory_range: + min: 16384 + max: 29775600 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 100 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 100 + job_id: jprv4x6vg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 382.0 + throughput: 2617.801047120419 + estimated_peak_memory_range: + min: 458752 + max: 14938352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -844,7 +846,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jg9lyndmg + job_id: jp4lxenq5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -853,10 +855,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:50:47Z' + timestamp: '2024-11-09T22:35:52Z' - torchscript_onnx_qnn: - inference_time: 378.0 - throughput: 2645.5026455026455 + inference_time: 397.0 + throughput: 2518.891687657431 estimated_peak_memory_range: min: 442368 max: 442368 @@ -867,14 +869,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: j56yz4wnp + job_id: jgo21dz4p job_status: Passed torchscript_onnx: - inference_time: 512.0 - throughput: 1953.125 + inference_time: 515.0 + throughput: 1941.7475728155339 estimated_peak_memory_range: - min: 3821568 - max: 3821568 + min: 2887680 + max: 2887680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -882,7 +884,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jpy1j3m0p + job_id: jp8q3j0zp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -891,4 +893,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:50:56Z' + timestamp: '2024-11-09T22:36:00Z' diff --git a/qai_hub_models/models/mediapipe_face_quantized/README.md b/qai_hub_models/models/mediapipe_face_quantized/README.md index f25ccafb..62b49410 100644 --- a/qai_hub_models/models/mediapipe_face_quantized/README.md +++ b/qai_hub_models/models/mediapipe_face_quantized/README.md @@ -5,8 +5,7 @@ Designed for sub-millisecond processing, this model predicts bounding boxes and pose skeletons (left eye, right eye, nose tip, mouth, left eye tragion, and right eye tragion) of faces in an image. -This is based on the implementation of MediaPipe-Face-Detection-Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/mediapipe_face_quantized). diff --git a/qai_hub_models/models/mediapipe_face_quantized/perf.yaml b/qai_hub_models/models/mediapipe_face_quantized/perf.yaml index 53a51100..d3653adc 100644 --- a/qai_hub_models/models/mediapipe_face_quantized/perf.yaml +++ b/qai_hub_models/models/mediapipe_face_quantized/perf.yaml @@ -49,11 +49,11 @@ models: - name: MediaPipeFaceDetector performance_metrics: - torchscript_onnx_tflite: - inference_time: 270.0 - throughput: 3703.703703703704 + inference_time: 276.0 + throughput: 3623.1884057971015 estimated_peak_memory_range: min: 12288 - max: 1518752 + max: 1375512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -61,14 +61,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 121 - job_id: jp14wzq7p + job_id: jp2k7r66p job_status: Passed torchscript_onnx_qnn: inference_time: 302.0 throughput: 3311.2582781456954 estimated_peak_memory_range: - min: 16384 - max: 11584480 + min: 221184 + max: 11594152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -76,30 +76,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 151 - job_id: jp14wz37p - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:49:22Z' - - torchscript_onnx_tflite: - inference_time: 274.0 - throughput: 3649.6350364963505 - estimated_peak_memory_range: - min: 12288 - max: 3322096 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 121 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 121 - job_id: j57ylrv95 + job_id: jgn69z8j5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -108,13 +85,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:49:24Z' + timestamp: '2024-11-09T22:34:28Z' - torchscript_onnx_tflite: - inference_time: 192.0 - throughput: 5208.333333333333 + inference_time: 181.0 + throughput: 5524.861878453039 estimated_peak_memory_range: min: 12288 - max: 33796048 + max: 33438576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -122,14 +99,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 121 - job_id: jpxk6oel5 + job_id: jp0z1m705 job_status: Passed torchscript_onnx_qnn: - inference_time: 205.0 - throughput: 4878.048780487805 + inference_time: 203.0 + throughput: 4926.108374384236 estimated_peak_memory_range: min: 204800 - max: 19190240 + max: 19699984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -137,7 +114,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 151 - job_id: jp14wz3np + job_id: jp2k7rn6p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +123,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:49:27Z' + timestamp: '2024-11-09T22:34:29Z' - torchscript_onnx_tflite: - inference_time: 201.0 - throughput: 4975.124378109453 + inference_time: 199.0 + throughput: 5025.125628140703 estimated_peak_memory_range: min: 8192 - max: 24398464 + max: 24289984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -160,14 +137,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 121 - job_id: jpedwm2v5 + job_id: jgkel2mvg job_status: Passed torchscript_onnx_qnn: - inference_time: 213.0 - throughput: 4694.835680751174 + inference_time: 218.0 + throughput: 4587.155963302752 estimated_peak_memory_range: min: 0 - max: 15039600 + max: 14809488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -175,7 +152,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 151 - job_id: jp3j10xmg + job_id: jp0z1o005 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -184,13 +161,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:49:43Z' + timestamp: '2024-11-09T22:34:31Z' - torchscript_onnx_tflite: - inference_time: 678.0 - throughput: 1474.9262536873157 + inference_time: 681.0 + throughput: 1468.4287812041116 estimated_peak_memory_range: min: 12288 - max: 26341904 + max: 25635600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -198,14 +175,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 121 - job_id: jgn6mvrq5 + job_id: jglv0yr25 job_status: Passed torchscript_onnx_qnn: - inference_time: 734.0 - throughput: 1362.3978201634877 + inference_time: 775.0 + throughput: 1290.3225806451612 estimated_peak_memory_range: - min: 12288 - max: 8031936 + min: 217088 + max: 8481568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -213,7 +190,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 151 - job_id: j57ylr6n5 + job_id: jgkel6xvg job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -222,13 +199,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-26T23:49:29Z' + timestamp: '2024-11-09T22:34:33Z' - torchscript_onnx_tflite: - inference_time: 5021.0 - throughput: 199.16351324437363 + inference_time: 4967.0 + throughput: 201.32876988121603 estimated_peak_memory_range: - min: 32768 - max: 1863392 + min: 49152 + max: 5403240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -236,7 +213,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 121 - job_id: jp2k9y3qp + job_id: jp3j4z2mg job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -245,13 +222,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-26T23:49:07Z' + timestamp: '2024-11-09T22:34:15Z' - torchscript_onnx_tflite: - inference_time: 274.0 - throughput: 3649.6350364963505 + inference_time: 269.0 + throughput: 3717.472118959108 estimated_peak_memory_range: min: 12288 - max: 1383712 + max: 1252544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -259,14 +236,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 121 - job_id: jp0z20en5 + job_id: jpv61lxz5 job_status: Passed torchscript_onnx_qnn: - inference_time: 297.0 - throughput: 3367.003367003367 + inference_time: 301.0 + throughput: 3322.2591362126245 estimated_peak_memory_range: - min: 233472 - max: 1899912 + min: 229376 + max: 1423016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -274,7 +251,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 151 - job_id: jpxk6om85 + job_id: jglv0wm25 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -283,13 +260,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:49:31Z' + timestamp: '2024-11-09T22:34:35Z' - torchscript_onnx_tflite: - inference_time: 271.0 - throughput: 3690.036900369004 + inference_time: 273.0 + throughput: 3663.003663003663 estimated_peak_memory_range: - min: 32768 - max: 18406048 + min: 12288 + max: 2117584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -297,14 +274,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 121 - job_id: j57ylwyv5 + job_id: jpedr7385 job_status: Passed torchscript_onnx_qnn: - inference_time: 302.0 - throughput: 3311.2582781456954 + inference_time: 303.0 + throughput: 3300.3300330033003 estimated_peak_memory_range: min: 221184 - max: 1577240 + max: 1785400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -312,7 +289,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 151 - job_id: jp0z2qz95 + job_id: jpv612kz5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -321,13 +298,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:29:00Z' + timestamp: '2024-11-09T22:34:40Z' - torchscript_onnx_tflite: - inference_time: 269.0 - throughput: 3717.472118959108 + inference_time: 275.0 + throughput: 3636.3636363636365 estimated_peak_memory_range: - min: 12288 - max: 1351520 + min: 16384 + max: 1373840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -335,14 +312,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 121 - job_id: jpxk6jk35 + job_id: j5wedln45 job_status: Passed torchscript_onnx_qnn: - inference_time: 298.0 - throughput: 3355.7046979865772 + inference_time: 306.0 + throughput: 3267.97385620915 estimated_peak_memory_range: - min: 229376 - max: 1898472 + min: 225280 + max: 1716424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -350,7 +327,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 151 - job_id: jgkeqnewg + job_id: jpedr6m85 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -359,13 +336,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:29:02Z' + timestamp: '2024-11-09T22:34:41Z' - torchscript_onnx_tflite: - inference_time: 269.0 - throughput: 3717.472118959108 + inference_time: 279.0 + throughput: 3584.2293906810037 estimated_peak_memory_range: min: 12288 - max: 1337560 + max: 1540336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -373,14 +350,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 121 - job_id: jp2k96krp + job_id: jp14dnxnp job_status: Passed torchscript_onnx_qnn: inference_time: 302.0 throughput: 3311.2582781456954 estimated_peak_memory_range: - min: 233472 - max: 1532832 + min: 217088 + max: 1871136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -388,7 +365,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 151 - job_id: jp3j13j3g + job_id: j5wedy645 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -397,13 +374,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:29:05Z' + timestamp: '2024-11-09T22:34:43Z' - torchscript_onnx_tflite: - inference_time: 655.0 - throughput: 1526.7175572519084 + inference_time: 648.0 + throughput: 1543.20987654321 estimated_peak_memory_range: min: 12288 - max: 23146176 + max: 23584736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -411,14 +388,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 121 - job_id: jgn6my6k5 + job_id: j57yje3n5 job_status: Passed torchscript_onnx_qnn: - inference_time: 747.0 - throughput: 1338.6880856760374 + inference_time: 771.0 + throughput: 1297.0168612191958 estimated_peak_memory_range: min: 212992 - max: 6159296 + max: 6166560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -426,7 +403,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 151 - job_id: jglv2zvj5 + job_id: jp14doznp job_status: Passed reference_device_info: name: SA8295P ADP @@ -435,13 +412,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T01:29:04Z' + timestamp: '2024-11-09T22:34:45Z' - torchscript_onnx_tflite: - inference_time: 331.0 - throughput: 3021.1480362537764 + inference_time: 335.0 + throughput: 2985.0746268656717 estimated_peak_memory_range: min: 12288 - max: 35132864 + max: 35361520 primary_compute_unit: NPU precision: fp16 layer_info: @@ -449,14 +426,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 121 - job_id: jpv6rk4r5 + job_id: jpxk7l285 job_status: Passed torchscript_onnx_qnn: - inference_time: 353.0 - throughput: 2832.8611898016998 + inference_time: 358.0 + throughput: 2793.2960893854747 estimated_peak_memory_range: min: 208896 - max: 20738496 + max: 19876576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -464,7 +441,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 151 - job_id: jglv2mo25 + job_id: j5wedy6z5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -473,13 +450,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:49:41Z' + timestamp: '2024-11-09T22:34:48Z' - torchscript_onnx_qnn: - inference_time: 410.0 - throughput: 2439.0243902439024 + inference_time: 424.0 + throughput: 2358.490566037736 estimated_peak_memory_range: - min: 569344 - max: 569344 + min: 479232 + max: 479232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -487,7 +464,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 151 - job_id: jgn6mvxj5 + job_id: jp3j4o0mg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -496,53 +473,15 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:49:32Z' + timestamp: '2024-11-09T22:34:37Z' - name: MediaPipeFaceLandmarkDetector performance_metrics: - torchscript_onnx_tflite: - inference_time: 179.0 - throughput: 5586.592178770949 + inference_time: 186.0 + throughput: 5376.344086021505 estimated_peak_memory_range: - min: 0 - max: 1367096 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 117 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 117 - job_id: jgdxq17zp - job_status: Passed - torchscript_onnx_qnn: - inference_time: 216.0 - throughput: 4629.62962962963 - estimated_peak_memory_range: - min: 143360 - max: 10106584 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 112 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 112 - job_id: jgdxq10zp - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:49:23Z' - - torchscript_onnx_tflite: - inference_time: 171.0 - throughput: 5847.953216374269 - estimated_peak_memory_range: - min: 12288 - max: 1552952 + min: 16384 + max: 34952064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -550,14 +489,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: jp4ldrj15 + job_id: jpy14ow0p job_status: Passed torchscript_onnx_qnn: - inference_time: 223.0 - throughput: 4484.304932735426 + inference_time: 224.0 + throughput: 4464.285714285715 estimated_peak_memory_range: - min: 126976 - max: 3374088 + min: 131072 + max: 3724976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -565,7 +504,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: jg9lyn8mg + job_id: jprv4ljkg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -574,13 +513,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:49:25Z' + timestamp: '2024-11-09T22:34:28Z' - torchscript_onnx_tflite: - inference_time: 136.0 - throughput: 7352.941176470588 + inference_time: 129.0 + throughput: 7751.937984496124 estimated_peak_memory_range: min: 12288 - max: 27323872 + max: 27965472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -588,14 +527,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: j5mn6xv9p + job_id: jp8q3evqp job_status: Passed torchscript_onnx_qnn: - inference_time: 164.0 - throughput: 6097.5609756097565 + inference_time: 162.0 + throughput: 6172.83950617284 estimated_peak_memory_range: min: 0 - max: 13711792 + max: 14479440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -603,7 +542,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: jgdxq106p + job_id: jpy14o00p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -612,13 +551,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:49:27Z' + timestamp: '2024-11-09T22:34:30Z' - torchscript_onnx_tflite: - inference_time: 141.0 - throughput: 7092.198581560284 + inference_time: 118.0 + throughput: 8474.57627118644 estimated_peak_memory_range: min: 8192 - max: 19116960 + max: 18683056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -626,14 +565,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: jgz3jdwx5 + job_id: jglv0j885 job_status: Passed torchscript_onnx_qnn: - inference_time: 183.0 - throughput: 5464.48087431694 + inference_time: 143.0 + throughput: 6993.006993006993 estimated_peak_memory_range: min: 0 - max: 10235776 + max: 10506736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -641,7 +580,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: jgo2n6o1p + job_id: jp8q3jyqp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -650,13 +589,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:49:43Z' + timestamp: '2024-11-11T14:16:56Z' - torchscript_onnx_tflite: - inference_time: 396.0 - throughput: 2525.252525252525 + inference_time: 398.0 + throughput: 2512.5628140703516 estimated_peak_memory_range: - min: 24576 - max: 19995680 + min: 12288 + max: 19462544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -664,14 +603,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: jprv2317g + job_id: j56y38lnp job_status: Passed torchscript_onnx_qnn: - inference_time: 494.0 - throughput: 2024.2914979757086 + inference_time: 502.0 + throughput: 1992.03187250996 estimated_peak_memory_range: - min: 12288 - max: 8126496 + min: 126976 + max: 8121760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -679,7 +618,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: jp4ldr825 + job_id: j5q674qep job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -688,13 +627,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-26T23:49:29Z' + timestamp: '2024-11-09T22:34:34Z' - torchscript_onnx_tflite: - inference_time: 2893.0 - throughput: 345.66194262011754 + inference_time: 2958.0 + throughput: 338.0662609871535 estimated_peak_memory_range: - min: 16384 - max: 7851360 + min: 139264 + max: 6806144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -702,7 +641,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: jpy1j3vlp + job_id: jgo21lq1p job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -711,13 +650,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-26T23:49:08Z' + timestamp: '2024-11-09T22:34:16Z' - torchscript_onnx_tflite: - inference_time: 188.0 - throughput: 5319.148936170212 + inference_time: 180.0 + throughput: 5555.555555555556 estimated_peak_memory_range: min: 12288 - max: 2665344 + max: 1250784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -725,14 +664,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: jp8qmywop + job_id: jgjv0r41g job_status: Passed torchscript_onnx_qnn: - inference_time: 217.0 - throughput: 4608.294930875576 + inference_time: 218.0 + throughput: 4587.155963302752 estimated_peak_memory_range: - min: 139264 - max: 1761552 + min: 143360 + max: 1679992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -740,7 +679,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: j5mn6x47p + job_id: j56y3o4np job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -749,13 +688,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:49:31Z' + timestamp: '2024-11-09T22:34:36Z' - torchscript_onnx_tflite: - inference_time: 176.0 - throughput: 5681.818181818182 + inference_time: 184.0 + throughput: 5434.782608695652 estimated_peak_memory_range: min: 12288 - max: 1412072 + max: 3237360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -763,14 +702,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: jp4ldol85 + job_id: jgz3xlk45 job_status: Passed torchscript_onnx_qnn: - inference_time: 218.0 - throughput: 4587.155963302752 + inference_time: 215.0 + throughput: 4651.162790697675 estimated_peak_memory_range: - min: 143360 - max: 1495232 + min: 188416 + max: 1579832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -778,7 +717,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: jp8qm9qkp + job_id: jgjv03n1g job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -787,13 +726,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:29:00Z' + timestamp: '2024-11-09T22:34:40Z' - torchscript_onnx_tflite: - inference_time: 179.0 - throughput: 5586.592178770949 + inference_time: 176.0 + throughput: 5681.818181818182 estimated_peak_memory_range: - min: 12288 - max: 1532008 + min: 20480 + max: 1498752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -801,14 +740,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: j5mn62ndp + job_id: jg9l3zemg job_status: Passed torchscript_onnx_qnn: - inference_time: 220.0 - throughput: 4545.454545454545 + inference_time: 222.0 + throughput: 4504.504504504504 estimated_peak_memory_range: - min: 135168 - max: 1476016 + min: 139264 + max: 1432352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -816,7 +755,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: j5q6rk6np + job_id: jgz3xzd45 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -825,13 +764,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:29:02Z' + timestamp: '2024-11-09T22:34:42Z' - torchscript_onnx_tflite: - inference_time: 181.0 - throughput: 5524.861878453039 + inference_time: 183.0 + throughput: 5464.48087431694 estimated_peak_memory_range: min: 12288 - max: 1332928 + max: 71046024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -839,14 +778,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: jpy1jw18p + job_id: jgdxrdl6p job_status: Passed torchscript_onnx_qnn: - inference_time: 224.0 - throughput: 4464.285714285715 + inference_time: 220.0 + throughput: 4545.454545454545 estimated_peak_memory_range: - min: 147456 - max: 1498008 + min: 139264 + max: 1500992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -854,7 +793,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: jgo2n02qp + job_id: jg9l3onmg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -863,13 +802,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:29:06Z' + timestamp: '2024-11-09T22:34:44Z' - torchscript_onnx_tflite: - inference_time: 484.0 - throughput: 2066.115702479339 + inference_time: 479.0 + throughput: 2087.6826722338205 estimated_peak_memory_range: min: 12288 - max: 18411888 + max: 18151856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -877,14 +816,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: jprv2qv0g + job_id: jp4lxy025 job_status: Passed torchscript_onnx_qnn: - inference_time: 611.0 - throughput: 1636.6612111292961 + inference_time: 624.0 + throughput: 1602.5641025641025 estimated_peak_memory_range: min: 0 - max: 5956288 + max: 5887424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -892,7 +831,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: j56yzjy6p + job_id: jgdxr616p job_status: Passed reference_device_info: name: SA8295P ADP @@ -901,13 +840,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T01:29:04Z' + timestamp: '2024-11-09T22:34:46Z' - torchscript_onnx_tflite: - inference_time: 214.0 - throughput: 4672.897196261682 + inference_time: 217.0 + throughput: 4608.294930875576 estimated_peak_memory_range: min: 12288 - max: 28620048 + max: 28940656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -915,14 +854,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: jgjv2n1eg + job_id: j5mnw0y7p job_status: Passed torchscript_onnx_qnn: - inference_time: 253.0 - throughput: 3952.5691699604745 + inference_time: 255.0 + throughput: 3921.5686274509803 estimated_peak_memory_range: - min: 0 - max: 15108768 + min: 126976 + max: 15913872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -930,7 +869,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: j56yz4rnp + job_id: jg9l3onqg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -939,13 +878,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:49:41Z' + timestamp: '2024-11-09T22:34:48Z' - torchscript_onnx_qnn: - inference_time: 312.0 - throughput: 3205.128205128205 + inference_time: 318.0 + throughput: 3144.6540880503144 estimated_peak_memory_range: - min: 569344 - max: 569344 + min: 602112 + max: 602112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -953,7 +892,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: jprv239kg + job_id: jgo21d61p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -962,4 +901,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:49:33Z' + timestamp: '2024-11-09T22:34:38Z' diff --git a/qai_hub_models/models/mediapipe_hand/README.md b/qai_hub_models/models/mediapipe_hand/README.md index b5e9c832..4872c7f2 100644 --- a/qai_hub_models/models/mediapipe_hand/README.md +++ b/qai_hub_models/models/mediapipe_hand/README.md @@ -5,8 +5,7 @@ The MediaPipe Hand Landmark Detector is a machine learning pipeline that predicts bounding boxes and pose skeletons of hands in an image. -This is based on the implementation of MediaPipe-Hand-Detection found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/mediapipe_hand). diff --git a/qai_hub_models/models/mediapipe_hand/export.py b/qai_hub_models/models/mediapipe_hand/export.py index 18199f28..f2fa6e77 100644 --- a/qai_hub_models/models/mediapipe_hand/export.py +++ b/qai_hub_models/models/mediapipe_hand/export.py @@ -238,10 +238,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") parser = export_parser( - model_cls=Model, - components=ALL_COMPONENTS, - supports_qnn=False, - supports_precompiled_qnn_onnx=False, + model_cls=Model, components=ALL_COMPONENTS, supports_qnn=False ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/mediapipe_hand/perf.yaml b/qai_hub_models/models/mediapipe_hand/perf.yaml index 80111db2..957d1780 100644 --- a/qai_hub_models/models/mediapipe_hand/perf.yaml +++ b/qai_hub_models/models/mediapipe_hand/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,55 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: MediaPipeHandDetector performance_metrics: - torchscript_onnx_tflite: - inference_time: 702.0 - throughput: 1424.5014245014245 - estimated_peak_memory_range: - min: 32768 - max: 10279536 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 149 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 149 - job_id: jpv6rkwr5 - job_status: Passed - torchscript_onnx: - inference_time: 1178.0 - throughput: 848.8964346349745 - estimated_peak_memory_range: - min: 12288 - max: 5923504 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 196 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 196 - job_id: jp2k9y7qp - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:48:22Z' - - torchscript_onnx_tflite: - inference_time: 708.0 - throughput: 1412.4293785310736 + inference_time: 709.0 + throughput: 1410.4372355430182 estimated_peak_memory_range: - min: 28672 - max: 1467192 + min: 45056 + max: 8275376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -91,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jpedwmvv5 + job_id: jpxk7lw85 job_status: Passed torchscript_onnx: - inference_time: 1164.0 - throughput: 859.106529209622 + inference_time: 1182.0 + throughput: 846.0236886632825 estimated_peak_memory_range: - min: 12288 - max: 6182168 + min: 16384 + max: 57035456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,7 +70,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 196 - job_id: jp0z201n5 + job_id: jp3j4z3mg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -115,13 +79,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:48:23Z' + timestamp: '2024-11-09T22:33:28Z' - torchscript_onnx_tflite: - inference_time: 573.0 - throughput: 1745.2006980802792 + inference_time: 521.0 + throughput: 1919.3857965451057 estimated_peak_memory_range: min: 16384 - max: 62039856 + max: 61592144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -129,14 +93,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: j5we369m5 + job_id: jgn69zjj5 job_status: Passed torchscript_onnx: - inference_time: 990.0 - throughput: 1010.10101010101 + inference_time: 853.0 + throughput: 1172.3329425556858 estimated_peak_memory_range: - min: 331776 - max: 71149168 + min: 421888 + max: 70300480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -144,7 +108,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 196 - job_id: jgkeqxlng + job_id: jpv61loz5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -153,13 +117,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:48:26Z' + timestamp: '2024-11-09T22:33:30Z' - torchscript_onnx_tflite: - inference_time: 431.0 - throughput: 2320.185614849188 + inference_time: 521.0 + throughput: 1919.3857965451057 estimated_peak_memory_range: min: 8192 - max: 29442864 + max: 29029056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -167,14 +131,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jp0z20rn5 + job_id: jp2k7r26p job_status: Passed torchscript_onnx: - inference_time: 878.0 - throughput: 1138.9521640091116 + inference_time: 876.0 + throughput: 1141.552511415525 estimated_peak_memory_range: min: 0 - max: 34111008 + max: 33622128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -182,7 +146,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 196 - job_id: jp3j10qng + job_id: jpedr7185 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -191,13 +155,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:48:29Z' + timestamp: '2024-11-09T22:33:32Z' - torchscript_onnx_tflite: - inference_time: 705.0 - throughput: 1418.4397163120568 + inference_time: 703.0 + throughput: 1422.475106685633 estimated_peak_memory_range: - min: 12288 - max: 3430224 + min: 20480 + max: 3686024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -205,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jp14wz87p + job_id: jp0z1my05 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -214,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:47:50Z' + timestamp: '2024-11-09T22:33:00Z' - torchscript_onnx_tflite: - inference_time: 709.0 - throughput: 1410.4372355430182 + inference_time: 705.0 + throughput: 1418.4397163120568 estimated_peak_memory_range: - min: 12288 - max: 1368056 + min: 49152 + max: 4127536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -228,7 +192,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jpy1jw38p + job_id: jgkel2zvg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -237,13 +201,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:28:15Z' + timestamp: '2024-11-09T22:33:02Z' - torchscript_onnx_tflite: inference_time: 704.0 throughput: 1420.4545454545455 estimated_peak_memory_range: - min: 24576 - max: 1443480 + min: 12288 + max: 3177072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -251,7 +215,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jp8qm9ykp + job_id: jglv0yn25 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -260,13 +224,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:28:17Z' + timestamp: '2024-11-09T22:33:04Z' - torchscript_onnx_tflite: - inference_time: 704.0 - throughput: 1420.4545454545455 + inference_time: 712.0 + throughput: 1404.4943820224719 estimated_peak_memory_range: - min: 24576 - max: 4510912 + min: 16384 + max: 3205880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -274,7 +238,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jglv2zmj5 + job_id: jp3j4zkmg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -283,13 +247,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:28:19Z' + timestamp: '2024-11-09T22:33:05Z' - torchscript_onnx_tflite: - inference_time: 1282.0 - throughput: 780.0312012480499 + inference_time: 1750.0 + throughput: 571.4285714285714 estimated_peak_memory_range: min: 12288 - max: 54964640 + max: 23213376 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 149 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 149 + job_id: jpv61l3z5 + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:33:07Z' + - torchscript_onnx_tflite: + inference_time: 1290.0 + throughput: 775.1937984496124 + estimated_peak_memory_range: + min: 16384 + max: 54762208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -297,7 +284,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jp2k9ymqp + job_id: jpedr7985 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -306,13 +293,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:47:57Z' + timestamp: '2024-11-09T22:33:09Z' - torchscript_onnx: - inference_time: 1215.0 - throughput: 823.0452674897119 + inference_time: 1210.0 + throughput: 826.4462809917355 estimated_peak_memory_range: - min: 5664768 - max: 5664768 + min: 4370432 + max: 4370432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -320,7 +307,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 196 - job_id: jglv2mem5 + job_id: j5wedlv45 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -329,53 +316,15 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:48:27Z' + timestamp: '2024-11-09T22:33:34Z' - name: MediaPipeHandLandmarkDetector performance_metrics: - torchscript_onnx_tflite: - inference_time: 1045.0 - throughput: 956.9377990430622 - estimated_peak_memory_range: - min: 12288 - max: 9752976 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 158 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 158 - job_id: jgjv2nleg - job_status: Passed - torchscript_onnx: - inference_time: 1618.0 - throughput: 618.0469715698393 - estimated_peak_memory_range: - min: 12288 - max: 8126296 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 209 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 209 - job_id: jpy1j34lp - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:48:22Z' - - torchscript_onnx_tflite: - inference_time: 1034.0 - throughput: 967.1179883945841 + inference_time: 1010.0 + throughput: 990.0990099009902 estimated_peak_memory_range: - min: 12288 - max: 1470312 + min: 61440 + max: 1520728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -383,14 +332,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jgz3jd7x5 + job_id: j5mnw0j7p job_status: Passed torchscript_onnx: - inference_time: 1532.0 - throughput: 652.7415143603133 + inference_time: 1537.0 + throughput: 650.6180871828237 estimated_peak_memory_range: - min: 12288 - max: 8072232 + min: 16384 + max: 8079800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -398,7 +347,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 209 - job_id: jp8qmy3op + job_id: jgo21l01p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -407,13 +356,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:48:24Z' + timestamp: '2024-11-09T22:33:29Z' - torchscript_onnx_tflite: - inference_time: 908.0 - throughput: 1101.3215859030836 + inference_time: 750.0 + throughput: 1333.3333333333333 estimated_peak_memory_range: - min: 12288 - max: 64925152 + min: 16384 + max: 64964736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -421,14 +370,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jg9lyn48g + job_id: jprv4lzkg job_status: Passed torchscript_onnx: - inference_time: 1239.0 - throughput: 807.1025020177563 + inference_time: 1150.0 + throughput: 869.5652173913044 estimated_peak_memory_range: - min: 499712 - max: 68428848 + min: 0 + max: 68039088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -436,7 +385,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 209 - job_id: j5q6rq7op + job_id: jgjv0rm1g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -445,13 +394,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:48:26Z' + timestamp: '2024-11-09T22:33:31Z' - torchscript_onnx_tflite: inference_time: 692.0 throughput: 1445.086705202312 estimated_peak_memory_range: min: 8192 - max: 33384192 + max: 32965680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -459,14 +408,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jp8qmy7op + job_id: jpy14o90p job_status: Passed torchscript_onnx: - inference_time: 1072.0 - throughput: 932.8358208955224 + inference_time: 1067.0 + throughput: 937.207122774133 estimated_peak_memory_range: min: 0 - max: 39055024 + max: 38867072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -474,7 +423,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 209 - job_id: jgo2n6ekp + job_id: jgz3xl945 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -483,13 +432,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:48:29Z' + timestamp: '2024-11-09T22:33:33Z' - torchscript_onnx_tflite: - inference_time: 1018.0 - throughput: 982.3182711198428 + inference_time: 1005.0 + throughput: 995.0248756218906 estimated_peak_memory_range: - min: 12288 - max: 2440888 + min: 20480 + max: 1513440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -497,7 +446,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jgdxq1vzp + job_id: jp8q3eoqp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -506,13 +455,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:47:50Z' + timestamp: '2024-11-09T22:33:01Z' - torchscript_onnx_tflite: - inference_time: 1013.0 - throughput: 987.1668311944719 + inference_time: 1007.0 + throughput: 993.0486593843099 estimated_peak_memory_range: - min: 16384 - max: 1590224 + min: 12288 + max: 2690624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -520,7 +469,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jp0z2q095 + job_id: j5q67l8ep job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -529,13 +478,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:28:16Z' + timestamp: '2024-11-09T22:33:02Z' - torchscript_onnx_tflite: - inference_time: 1028.0 - throughput: 972.7626459143969 + inference_time: 1032.0 + throughput: 968.9922480620155 estimated_peak_memory_range: min: 12288 - max: 2520624 + max: 1616232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -543,7 +492,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jgkeqnxwg + job_id: j56y386np job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -552,13 +501,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:28:17Z' + timestamp: '2024-11-09T22:33:04Z' - torchscript_onnx_tflite: - inference_time: 1009.0 - throughput: 991.0802775024777 + inference_time: 1061.0 + throughput: 942.5070688030161 estimated_peak_memory_range: - min: 45056 - max: 20290400 + min: 16384 + max: 1615800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -566,7 +515,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: j56yzj46p + job_id: jgo21ly1p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -575,13 +524,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:28:19Z' + timestamp: '2024-11-09T22:33:06Z' - torchscript_onnx_tflite: - inference_time: 2574.0 - throughput: 388.5003885003885 + inference_time: 4528.0 + throughput: 220.84805653710248 estimated_peak_memory_range: - min: 16384 - max: 59153216 + min: 45056 + max: 37067424 + primary_compute_unit: GPU + precision: fp16 + layer_info: + layers_on_npu: 0 + layers_on_gpu: 158 + layers_on_cpu: 0 + total_layers: 158 + job_id: jgjv0rx1g + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:33:08Z' + - torchscript_onnx_tflite: + inference_time: 2635.0 + throughput: 379.5066413662239 + estimated_peak_memory_range: + min: 12288 + max: 57811856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -589,7 +561,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jpy1j3dlp + job_id: jgz3xle45 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -598,13 +570,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:47:57Z' + timestamp: '2024-11-09T22:33:10Z' - torchscript_onnx: - inference_time: 1649.0 - throughput: 606.4281382656155 + inference_time: 1650.0 + throughput: 606.060606060606 estimated_peak_memory_range: - min: 7933952 - max: 7933952 + min: 8060928 + max: 8060928 primary_compute_unit: NPU precision: fp16 layer_info: @@ -612,7 +584,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 209 - job_id: j56yz4qyp + job_id: jg9l3z1mg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -621,4 +593,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:48:28Z' + timestamp: '2024-11-09T22:33:34Z' diff --git a/qai_hub_models/models/mediapipe_pose/README.md b/qai_hub_models/models/mediapipe_pose/README.md index 02ceb20e..9e194785 100644 --- a/qai_hub_models/models/mediapipe_pose/README.md +++ b/qai_hub_models/models/mediapipe_pose/README.md @@ -5,8 +5,7 @@ The MediaPipe Pose Landmark Detector is a machine learning pipeline that predicts bounding boxes and pose skeletons of poses in an image. -This is based on the implementation of MediaPipe-Pose-Estimation found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/mediapipe_pose). diff --git a/qai_hub_models/models/mediapipe_pose/export.py b/qai_hub_models/models/mediapipe_pose/export.py index fcd19469..a82beefd 100644 --- a/qai_hub_models/models/mediapipe_pose/export.py +++ b/qai_hub_models/models/mediapipe_pose/export.py @@ -238,10 +238,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") parser = export_parser( - model_cls=Model, - components=ALL_COMPONENTS, - supports_qnn=False, - supports_precompiled_qnn_onnx=False, + model_cls=Model, components=ALL_COMPONENTS, supports_qnn=False ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/mediapipe_pose/perf.yaml b/qai_hub_models/models/mediapipe_pose/perf.yaml index b7fc99f8..c4cd3d53 100644 --- a/qai_hub_models/models/mediapipe_pose/perf.yaml +++ b/qai_hub_models/models/mediapipe_pose/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,33 +36,56 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: MediaPipePoseDetector performance_metrics: - - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:47:15Z' - - reference_device_info: + - torchscript_onnx_tflite: + inference_time: 780.0 + throughput: 1282.051282051282 + estimated_peak_memory_range: + min: 16384 + max: 12040440 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 106 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 106 + job_id: jpxk7l8l5 + job_status: Passed + torchscript_onnx: + inference_time: 1010.0 + throughput: 990.0990099009902 + estimated_peak_memory_range: + min: 16384 + max: 3957816 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 139 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 139 + job_id: jp3j4zyng + job_status: Passed + reference_device_info: name: Samsung Galaxy S23 os: '13' form_factor: Phone os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:47:16Z' + timestamp: '2024-11-09T22:32:24Z' - torchscript_onnx_tflite: - inference_time: 666.0 - throughput: 1501.5015015015015 + inference_time: 560.0 + throughput: 1785.7142857142858 estimated_peak_memory_range: - min: 65536 - max: 48932144 + min: 12288 + max: 48446688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -69,14 +93,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jpedwmwo5 + job_id: jgn69zdq5 job_status: Passed torchscript_onnx: - inference_time: 856.0 - throughput: 1168.2242990654206 + inference_time: 734.0 + throughput: 1362.3978201634877 estimated_peak_memory_range: min: 0 - max: 52316800 + max: 51581040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -84,7 +108,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: jgn6mv3q5 + job_id: jpv61ljr5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -93,13 +117,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:47:18Z' + timestamp: '2024-11-09T22:32:26Z' - torchscript_onnx_tflite: - inference_time: 560.0 - throughput: 1785.7142857142858 + inference_time: 563.0 + throughput: 1776.1989342806394 estimated_peak_memory_range: min: 12288 - max: 24819136 + max: 24686112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -107,14 +131,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jp2k9y9rp + job_id: jp2k7rqqp job_status: Passed torchscript_onnx: - inference_time: 751.0 - throughput: 1331.5579227696405 + inference_time: 757.0 + throughput: 1321.003963011889 estimated_peak_memory_range: min: 0 - max: 26806480 + max: 26803136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -122,7 +146,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: jp0z20ln5 + job_id: jpedr7jv5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -131,13 +155,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:47:22Z' + timestamp: '2024-11-09T22:32:28Z' - torchscript_onnx_tflite: - inference_time: 777.0 - throughput: 1287.001287001287 + inference_time: 775.0 + throughput: 1290.3225806451612 estimated_peak_memory_range: - min: 16384 - max: 1310016 + min: 12288 + max: 4893352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -145,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: j5we36335 + job_id: jp0z1m8n5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -154,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:46:43Z' + timestamp: '2024-11-09T22:31:55Z' - torchscript_onnx_tflite: - inference_time: 777.0 - throughput: 1287.001287001287 + inference_time: 774.0 + throughput: 1291.9896640826873 estimated_peak_memory_range: - min: 28672 - max: 1469472 + min: 0 + max: 1366592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -168,7 +192,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jp3j1k23g + job_id: jgkel2wng job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -177,13 +201,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:27:46Z' + timestamp: '2024-11-09T22:31:57Z' - torchscript_onnx_tflite: - inference_time: 777.0 - throughput: 1287.001287001287 + inference_time: 776.0 + throughput: 1288.659793814433 estimated_peak_memory_range: - min: 0 - max: 8042264 + min: 28672 + max: 1499992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -191,7 +215,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jpv6rokk5 + job_id: jglv0ydm5 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -200,13 +224,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:27:48Z' + timestamp: '2024-11-09T22:31:59Z' - torchscript_onnx_tflite: - inference_time: 781.0 - throughput: 1280.4097311139565 + inference_time: 772.0 + throughput: 1295.3367875647668 estimated_peak_memory_range: min: 28672 - max: 1428032 + max: 1488832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -214,7 +238,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jpedw1mo5 + job_id: jp3j4zdng job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -223,13 +247,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:27:49Z' + timestamp: '2024-11-09T22:32:01Z' - torchscript_onnx_tflite: - inference_time: 1901.0 - throughput: 526.0389268805892 + inference_time: 2347.0 + throughput: 426.075841499787 estimated_peak_memory_range: min: 61440 - max: 43361312 + max: 20936112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -237,7 +261,30 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jgn6mvmk5 + job_id: jpv61l8r5 + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:32:03Z' + - torchscript_onnx_tflite: + inference_time: 1902.0 + throughput: 525.7623554153523 + estimated_peak_memory_range: + min: 61440 + max: 42927712 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 106 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 106 + job_id: jpedr7qv5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -246,13 +293,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:46:50Z' + timestamp: '2024-11-09T22:32:04Z' - torchscript_onnx: - inference_time: 1055.0 - throughput: 947.8672985781991 + inference_time: 1062.0 + throughput: 941.6195856873823 estimated_peak_memory_range: - min: 4292608 - max: 4292608 + min: 2957312 + max: 2957312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -260,7 +307,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: jp2k9ylqp + job_id: j5wedljm5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -269,38 +316,15 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:47:20Z' + timestamp: '2024-11-09T22:32:29Z' - name: MediaPipePoseLandmarkDetector performance_metrics: - - torchscript_onnx: - inference_time: 1338.0 - throughput: 747.3841554559043 - estimated_peak_memory_range: - min: 12288 - max: 45697064 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 291 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 291 - job_id: jp4ldrv15 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:47:15Z' - torchscript_onnx_tflite: - inference_time: 836.0 - throughput: 1196.1722488038276 + inference_time: 819.0 + throughput: 1221.001221001221 estimated_peak_memory_range: - min: 16384 - max: 32783280 + min: 53248 + max: 2038872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -308,14 +332,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jgjv2n2vg + job_id: j5mnw019p job_status: Passed torchscript_onnx: - inference_time: 1350.0 - throughput: 740.7407407407408 + inference_time: 1321.0 + throughput: 757.002271006813 estimated_peak_memory_range: - min: 16384 - max: 15667920 + min: 12288 + max: 9394240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -323,7 +347,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 291 - job_id: j5mn6x39p + job_id: jgo21ljkp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -332,13 +356,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:47:17Z' + timestamp: '2024-11-09T22:32:25Z' - torchscript_onnx_tflite: - inference_time: 636.0 - throughput: 1572.3270440251572 + inference_time: 615.0 + throughput: 1626.0162601626016 estimated_peak_memory_range: - min: 16384 - max: 94705952 + min: 12288 + max: 94701632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -346,14 +370,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jgz3jdjo5 + job_id: jprv4lm7g job_status: Passed torchscript_onnx: - inference_time: 1054.0 - throughput: 948.7666034155598 + inference_time: 1000.0 + throughput: 1000.0 estimated_peak_memory_range: - min: 0 - max: 101390912 + min: 139264 + max: 99661328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -361,7 +385,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 291 - job_id: jprv23e7g + job_id: jgjv0rjeg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -370,13 +394,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:47:19Z' + timestamp: '2024-11-09T22:32:26Z' - torchscript_onnx_tflite: - inference_time: 547.0 - throughput: 1828.1535648994516 + inference_time: 469.0 + throughput: 2132.1961620469083 estimated_peak_memory_range: min: 8192 - max: 36986592 + max: 36329120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -384,14 +408,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jpy1j3j8p + job_id: jpy14oklp job_status: Passed torchscript_onnx: - inference_time: 771.0 - throughput: 1297.0168612191958 + inference_time: 915.0 + throughput: 1092.896174863388 estimated_peak_memory_range: min: 0 - max: 44852208 + max: 43973312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -399,7 +423,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 291 - job_id: jp8qmyzop + job_id: jgz3xl1x5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -408,13 +432,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:47:22Z' + timestamp: '2024-11-09T22:32:28Z' - torchscript_onnx_tflite: - inference_time: 804.0 - throughput: 1243.7810945273632 + inference_time: 802.0 + throughput: 1246.8827930174564 estimated_peak_memory_range: - min: 32768 - max: 1754480 + min: 16384 + max: 1534536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -422,7 +446,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jg9lynywg + job_id: jp8q3edop job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -431,13 +455,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:46:43Z' + timestamp: '2024-11-09T22:31:55Z' - torchscript_onnx_tflite: - inference_time: 797.0 - throughput: 1254.7051442910915 + inference_time: 824.0 + throughput: 1213.5922330097087 estimated_peak_memory_range: min: 12288 - max: 3596520 + max: 1658144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -445,7 +469,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jgo2nyqqp + job_id: j5q67lxop job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -454,13 +478,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:27:47Z' + timestamp: '2024-11-09T22:31:57Z' - torchscript_onnx_tflite: - inference_time: 832.0 - throughput: 1201.923076923077 + inference_time: 827.0 + throughput: 1209.1898428053205 estimated_peak_memory_range: - min: 24576 - max: 1586640 + min: 16384 + max: 2335352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -468,7 +492,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jgjv2mnvg + job_id: j56y38xyp job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -477,13 +501,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:27:48Z' + timestamp: '2024-11-09T22:31:59Z' - torchscript_onnx_tflite: - inference_time: 843.0 - throughput: 1186.2396204033214 + inference_time: 816.0 + throughput: 1225.4901960784314 estimated_peak_memory_range: - min: 12288 - max: 2245992 + min: 28672 + max: 5011768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -491,7 +515,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jgz3j9do5 + job_id: jgo21lxkp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -500,13 +524,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:27:50Z' + timestamp: '2024-11-09T22:32:01Z' + - torchscript_onnx_tflite: + inference_time: 5116.0 + throughput: 195.46520719311962 + estimated_peak_memory_range: + min: 126976 + max: 41257792 + primary_compute_unit: GPU + precision: fp16 + layer_info: + layers_on_npu: 0 + layers_on_gpu: 219 + layers_on_cpu: 0 + total_layers: 219 + job_id: jgjv0r9eg + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:32:03Z' - torchscript_onnx_tflite: - inference_time: 1731.0 - throughput: 577.7007510109763 + inference_time: 1799.0 + throughput: 555.864369093941 estimated_peak_memory_range: min: 12288 - max: 82573920 + max: 82500896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -514,7 +561,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jprv2320g + job_id: jgz3xl6x5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -523,12 +570,27 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:46:51Z' - - reference_device_info: + timestamp: '2024-11-09T22:32:05Z' + - torchscript_onnx: + inference_time: 1405.0 + throughput: 711.7437722419929 + estimated_peak_memory_range: + min: 8007680 + max: 8007680 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 291 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 291 + job_id: jg9l3z68g + job_status: Passed + reference_device_info: name: Snapdragon X Elite CRD os: '11' form_factor: Compute os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:47:21Z' + timestamp: '2024-11-09T22:32:30Z' diff --git a/qai_hub_models/models/mediapipe_selfie/README.md b/qai_hub_models/models/mediapipe_selfie/README.md index e24f4220..577a80d2 100644 --- a/qai_hub_models/models/mediapipe_selfie/README.md +++ b/qai_hub_models/models/mediapipe_selfie/README.md @@ -5,8 +5,7 @@ Light-weight model that segments a person from the background in square or landscape selfie and video conference imagery. -This is based on the implementation of MediaPipe-Selfie-Segmentation found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/mediapipe_selfie). diff --git a/qai_hub_models/models/mediapipe_selfie/perf.yaml b/qai_hub_models/models/mediapipe_selfie/perf.yaml index 78579c60..0db1a226 100644 --- a/qai_hub_models/models/mediapipe_selfie/perf.yaml +++ b/qai_hub_models/models/mediapipe_selfie/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: MediaPipe-Selfie-Segmentation performance_metrics: - torchscript_onnx_tflite: - inference_time: 701.0 - throughput: 1426.5335235378031 + inference_time: 696.0 + throughput: 1436.7816091954023 estimated_peak_memory_range: min: 12288 - max: 1688856 + max: 2208952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jglv30wm5 + job_id: jp14dnm7p job_status: Passed torchscript_onnx_qnn: - inference_time: 776.0 - throughput: 1288.659793814433 + inference_time: 773.0 + throughput: 1293.6610608020699 estimated_peak_memory_range: - min: 2109440 - max: 8302448 + min: 806912 + max: 7057216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: j56yn3oyp + job_id: jpy14oylp job_status: Passed torchscript_onnx: - inference_time: 1303.0 - throughput: 767.4597083653108 + inference_time: 1296.0 + throughput: 771.604938271605 estimated_peak_memory_range: - min: 12288 - max: 5158480 + min: 352256 + max: 1960024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,45 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jgdxor6zp - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:44:44Z' - - torchscript_onnx_tflite: - inference_time: 701.0 - throughput: 1426.5335235378031 - estimated_peak_memory_range: - min: 20480 - max: 1741864 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 118 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 118 - job_id: jpy1j3z8p - job_status: Passed - torchscript_onnx_qnn: - inference_time: 756.0 - throughput: 1322.7513227513227 - estimated_peak_memory_range: - min: 823296 - max: 2176648 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 138 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 138 - job_id: jp3je4ong + job_id: jgjv0r6eg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -130,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:44:35Z' + timestamp: '2024-11-09T22:31:24Z' - torchscript_onnx_tflite: - inference_time: 471.0 - throughput: 2123.1422505307855 + inference_time: 467.0 + throughput: 2141.3276231263385 estimated_peak_memory_range: - min: 12288 - max: 30494624 + min: 0 + max: 29358256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -144,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jp0z20495 + job_id: jgdxrdmzp job_status: Passed torchscript_onnx_qnn: - inference_time: 503.0 - throughput: 1988.0715705765408 + inference_time: 511.0 + throughput: 1956.9471624266146 estimated_peak_memory_range: - min: 0 - max: 8878608 + min: 802816 + max: 14855648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jgo231dkp + job_id: jp0z1mxn5 job_status: Passed torchscript_onnx: - inference_time: 1103.0 - throughput: 906.6183136899365 + inference_time: 879.0 + throughput: 1137.6564277588168 estimated_peak_memory_range: min: 0 - max: 33931440 + max: 34246032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,7 +138,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: j5mn6xqdp + job_id: jpedr70v5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -183,13 +147,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-31T14:44:36Z' + timestamp: '2024-11-09T22:31:25Z' - torchscript_onnx_tflite: - inference_time: 436.0 - throughput: 2293.577981651376 + inference_time: 434.0 + throughput: 2304.147465437788 estimated_peak_memory_range: - min: 12288 - max: 19306928 + min: 8192 + max: 19176432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -197,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jp3j10n3g + job_id: j57yje895 job_status: Passed torchscript_onnx_qnn: - inference_time: 431.0 - throughput: 2320.185614849188 + inference_time: 519.0 + throughput: 1926.7822736030828 estimated_peak_memory_range: min: 0 - max: 10351280 + max: 10884256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jpv6v12r5 + job_id: jp8q3ekop job_status: Passed torchscript_onnx: - inference_time: 695.0 - throughput: 1438.8489208633093 + inference_time: 872.0 + throughput: 1146.788990825688 estimated_peak_memory_range: min: 0 - max: 23714496 + max: 23223920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,7 +191,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jprv2380g + job_id: jgz3xlqx5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -236,13 +200,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-31T14:44:37Z' + timestamp: '2024-11-09T22:31:26Z' - torchscript_onnx_tflite: - inference_time: 697.0 - throughput: 1434.7202295552368 + inference_time: 696.0 + throughput: 1436.7816091954023 estimated_peak_memory_range: min: 12288 - max: 1605424 + max: 1531632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -250,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jp8qmy2kp + job_id: jp4lxy215 job_status: Passed torchscript_onnx_qnn: - inference_time: 751.0 - throughput: 1331.5579227696405 + inference_time: 757.0 + throughput: 1321.003963011889 estimated_peak_memory_range: - min: 811008 - max: 2122896 + min: 823296 + max: 2142552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,7 +229,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jgjve03eg + job_id: jgkel2kng job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -274,13 +238,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-31T14:44:38Z' + timestamp: '2024-11-09T22:31:16Z' - torchscript_onnx_tflite: - inference_time: 703.0 - throughput: 1422.475106685633 + inference_time: 696.0 + throughput: 1436.7816091954023 estimated_peak_memory_range: min: 28672 - max: 1571696 + max: 1868376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -288,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jprv2kj0g + job_id: jpxk7lzl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 760.0 - throughput: 1315.7894736842106 + inference_time: 763.0 + throughput: 1310.615989515072 estimated_peak_memory_range: - min: 823296 - max: 2180392 + min: 815104 + max: 2158320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,7 +267,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jp0z2y795 + job_id: jglv0y9m5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -312,13 +276,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:27:26Z' + timestamp: '2024-11-09T22:31:18Z' - torchscript_onnx_tflite: - inference_time: 708.0 - throughput: 1412.4293785310736 + inference_time: 700.0 + throughput: 1428.5714285714287 estimated_peak_memory_range: - min: 24576 - max: 1640376 + min: 16384 + max: 71297064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -326,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jp2k98nrp + job_id: j5mnw0l9p job_status: Passed torchscript_onnx_qnn: - inference_time: 765.0 - throughput: 1307.18954248366 + inference_time: 758.0 + throughput: 1319.2612137203166 estimated_peak_memory_range: - min: 815104 - max: 2122600 + min: 819200 + max: 2131016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,7 +305,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jp8qmovkp + job_id: j56y389yp job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -350,13 +314,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:27:27Z' + timestamp: '2024-11-09T22:31:19Z' - torchscript_onnx_tflite: - inference_time: 703.0 - throughput: 1422.475106685633 + inference_time: 699.0 + throughput: 1430.615164520744 estimated_peak_memory_range: min: 12288 - max: 4612240 + max: 1569136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -364,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jpy1je08p + job_id: jgn69zwq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 776.0 - throughput: 1288.659793814433 + inference_time: 756.0 + throughput: 1322.7513227513227 estimated_peak_memory_range: - min: 811008 - max: 2525544 + min: 32768 + max: 1384864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,7 +343,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jgkeqzmwg + job_id: jp3j4zlng job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -388,13 +352,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:27:28Z' + timestamp: '2024-11-09T22:31:20Z' - torchscript_onnx_tflite: - inference_time: 934.0 - throughput: 1070.6638115631692 + inference_time: 1305.0 + throughput: 766.2835249042146 estimated_peak_memory_range: min: 12288 - max: 30548544 + max: 18056160 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 118 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 118 + job_id: jprv4l77g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1454.0 + throughput: 687.757909215956 + estimated_peak_memory_range: + min: 0 + max: 5924880 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 138 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 138 + job_id: jgo21l7kp + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:31:22Z' + - torchscript_onnx_tflite: + inference_time: 924.0 + throughput: 1082.2510822510822 + estimated_peak_memory_range: + min: 16384 + max: 29474064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -402,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: j56yz426p + job_id: jp2k7rzqp job_status: Passed torchscript_onnx_qnn: - inference_time: 993.0 - throughput: 1007.0493454179255 + inference_time: 986.0 + throughput: 1014.1987829614604 estimated_peak_memory_range: - min: 778240 - max: 17266688 + min: 802816 + max: 16682032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jp14edo7p + job_id: jpv61lyr5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -426,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-31T14:44:43Z' + timestamp: '2024-11-09T22:31:23Z' - torchscript_onnx_qnn: - inference_time: 905.0 - throughput: 1104.9723756906078 + inference_time: 906.0 + throughput: 1103.7527593818984 estimated_peak_memory_range: min: 786432 max: 786432 @@ -440,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jpedkr6v5 + job_id: j5q67ldop job_status: Passed torchscript_onnx: - inference_time: 1374.0 - throughput: 727.802037845706 + inference_time: 1406.0 + throughput: 711.2375533428165 estimated_peak_memory_range: - min: 1982464 - max: 1982464 + min: 1970176 + max: 1970176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -455,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jgn6mvlk5 + job_id: j5wedl0m5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -464,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-31T14:44:39Z' + timestamp: '2024-11-09T22:31:27Z' diff --git a/qai_hub_models/models/midas/README.md b/qai_hub_models/models/midas/README.md index 2295b940..11d05c53 100644 --- a/qai_hub_models/models/midas/README.md +++ b/qai_hub_models/models/midas/README.md @@ -5,8 +5,7 @@ Midas is designed for estimating depth at each point in an image. -This is based on the implementation of Midas-V2 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/midas). diff --git a/qai_hub_models/models/midas/perf.yaml b/qai_hub_models/models/midas/perf.yaml index 156774ae..500bbbd1 100644 --- a/qai_hub_models/models/midas/perf.yaml +++ b/qai_hub_models/models/midas/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: Midas-V2 performance_metrics: - torchscript_onnx_tflite: - inference_time: 3267.0 - throughput: 306.09121518212424 + inference_time: 3227.0 + throughput: 309.88534242330337 estimated_peak_memory_range: - min: 20480 - max: 1762384 + min: 28672 + max: 9459208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: j5weqdlm5 + job_id: jp14dn18p job_status: Passed torchscript_onnx_qnn: - inference_time: 3285.0 - throughput: 304.41400304414003 + inference_time: 3276.0 + throughput: 305.25030525030525 estimated_peak_memory_range: min: 16384 - max: 115668336 + max: 115969208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,52 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jg9lw3z8g + job_id: j5mnw0k9p job_status: Passed torchscript_onnx: - inference_time: 3288.0 - throughput: 304.1362530413625 + inference_time: 3295.0 + throughput: 303.49013657056145 estimated_peak_memory_range: - min: 12288 - max: 43208480 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 199 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 199 - job_id: jpy164olp - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:44:18Z' - - torchscript_onnx_qnn: - inference_time: 3053.0 - throughput: 327.54667540124467 - estimated_peak_memory_range: - min: 827392 - max: 2335616 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 197 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 197 - job_id: jp14edn7p - job_status: Passed - torchscript_onnx: - inference_time: 3310.0 - throughput: 302.11480362537765 - estimated_peak_memory_range: - min: 12288 - max: 43511792 + min: 16384 + max: 42782512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 199 - job_id: jp14wz28p + job_id: j56y380yp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -130,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:44:10Z' + timestamp: '2024-11-09T22:30:42Z' - torchscript_onnx_tflite: - inference_time: 2428.0 - throughput: 411.8616144975288 + inference_time: 2270.0 + throughput: 440.52863436123346 estimated_peak_memory_range: - min: 12288 - max: 91704048 + min: 16384 + max: 93580832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -144,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jgn6mv6k5 + job_id: jgdxrd4rp job_status: Passed torchscript_onnx_qnn: - inference_time: 2123.0 - throughput: 471.03155911446066 + inference_time: 2312.0 + throughput: 432.52595155709344 estimated_peak_memory_range: - min: 811008 - max: 9450944 + min: 802816 + max: 29858352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jgdxordzp + job_id: jgn69zqq5 job_status: Passed torchscript_onnx: - inference_time: 2522.0 - throughput: 396.5107057890563 + inference_time: 2398.0 + throughput: 417.0141784820684 estimated_peak_memory_range: min: 0 - max: 95496896 + max: 96054976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,7 +138,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 199 - job_id: jgdxq1nrp + job_id: jp3j4zrng job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -183,13 +147,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-31T14:44:10Z' + timestamp: '2024-11-09T22:30:43Z' - torchscript_onnx_tflite: - inference_time: 2096.0 - throughput: 477.09923664122135 + inference_time: 2133.0 + throughput: 468.8232536333802 estimated_peak_memory_range: min: 12288 - max: 40663360 + max: 40279088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -197,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jgkeqxewg + job_id: j5wedlzm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2159.0 - throughput: 463.1773969430292 + inference_time: 2160.0 + throughput: 462.962962962963 estimated_peak_memory_range: min: 0 - max: 23781280 + max: 24089952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: j57yxje95 + job_id: jprv4ld7g job_status: Passed torchscript_onnx: - inference_time: 2177.0 - throughput: 459.34772622875516 + inference_time: 2219.0 + throughput: 450.6534474988734 estimated_peak_memory_range: - min: 630784 - max: 44741264 + min: 0 + max: 44065552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,7 +191,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 199 - job_id: jp4ldrn85 + job_id: jgo21l9kp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -236,13 +200,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-31T14:44:11Z' + timestamp: '2024-11-09T22:30:44Z' - torchscript_onnx_tflite: - inference_time: 3214.0 - throughput: 311.13876789047913 + inference_time: 3217.0 + throughput: 310.8486167236556 estimated_peak_memory_range: - min: 16384 - max: 2859792 + min: 12288 + max: 2207664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -250,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jprv23v0g + job_id: jg9l3z28g job_status: Passed torchscript_onnx_qnn: - inference_time: 3030.0 - throughput: 330.03300330033005 + inference_time: 3022.0 + throughput: 330.90668431502314 estimated_peak_memory_range: - min: 811008 - max: 2091272 + min: 819200 + max: 2102736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,7 +229,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jp4lvxy15 + job_id: jp2k7rdqp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -274,13 +238,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-31T14:44:12Z' + timestamp: '2024-11-09T22:30:34Z' - torchscript_onnx_tflite: - inference_time: 3241.0 - throughput: 308.54674483184203 + inference_time: 3239.0 + throughput: 308.73726458783574 estimated_peak_memory_range: - min: 20480 - max: 2013600 + min: 24576 + max: 1857328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -288,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: j5we3on35 + job_id: jp14dn17p job_status: Passed torchscript_onnx_qnn: - inference_time: 3048.0 - throughput: 328.0839895013123 + inference_time: 3040.0 + throughput: 328.94736842105266 estimated_peak_memory_range: - min: 819200 - max: 2173904 + min: 77824 + max: 1321144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,7 +267,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jgdxqwlrp + job_id: jp0z1m9n5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -312,13 +276,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:27:07Z' + timestamp: '2024-11-09T22:30:36Z' - torchscript_onnx_tflite: - inference_time: 3224.0 - throughput: 310.17369727047145 + inference_time: 3227.0 + throughput: 309.88534242330337 estimated_peak_memory_range: - min: 24576 - max: 2094176 + min: 28672 + max: 2469784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -326,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jg9lyvewg + job_id: jgdxrd4zp job_status: Passed torchscript_onnx_qnn: - inference_time: 3038.0 - throughput: 329.1639236339697 + inference_time: 3051.0 + throughput: 327.76138970829237 estimated_peak_memory_range: - min: 835584 - max: 2185760 + min: 815104 + max: 2593608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,7 +305,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: j57ylz3v5 + job_id: jp8q3erop job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -350,13 +314,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:27:08Z' + timestamp: '2024-11-09T22:30:37Z' - torchscript_onnx_tflite: - inference_time: 3226.0 - throughput: 309.98140111593307 + inference_time: 3236.0 + throughput: 309.02348578491967 estimated_peak_memory_range: - min: 49152 - max: 1861584 + min: 16384 + max: 1857632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -364,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jp14w0x8p + job_id: j57yjen95 job_status: Passed torchscript_onnx_qnn: - inference_time: 3049.0 - throughput: 327.97638570022957 + inference_time: 3051.0 + throughput: 327.76138970829237 estimated_peak_memory_range: - min: 806912 - max: 2541240 + min: 819200 + max: 2260448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,7 +343,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jp4ldq085 + job_id: jgkel20ng job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -388,13 +352,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:27:09Z' + timestamp: '2024-11-09T22:30:39Z' - torchscript_onnx_tflite: - inference_time: 4757.0 - throughput: 210.21652301870927 + inference_time: 5626.0 + throughput: 177.74617845716318 estimated_peak_memory_range: - min: 16384 - max: 97357648 + min: 311296 + max: 41664880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -402,14 +366,52 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jp8qmyqkp + job_id: jp4lxy415 job_status: Passed torchscript_onnx_qnn: - inference_time: 4965.0 - throughput: 201.4098690835851 + inference_time: 5459.0 + throughput: 183.18373328448433 estimated_peak_memory_range: - min: 802816 - max: 27308448 + min: 851968 + max: 6774592 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 197 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 197 + job_id: j5q67l1op + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:30:40Z' + - torchscript_onnx_tflite: + inference_time: 4734.0 + throughput: 211.23785382340515 + estimated_peak_memory_range: + min: 278528 + max: 96686288 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 138 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 138 + job_id: jpxk7lrl5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 4919.0 + throughput: 203.29335230737954 + estimated_peak_memory_range: + min: 806912 + max: 26614992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jp2kl7rqp + job_id: jglv0yqm5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -426,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-31T14:44:17Z' + timestamp: '2024-11-09T22:30:41Z' - torchscript_onnx_qnn: - inference_time: 3224.0 - throughput: 310.17369727047145 + inference_time: 3207.0 + throughput: 311.81789834736514 estimated_peak_memory_range: min: 786432 max: 786432 @@ -440,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jpxky7ll5 + job_id: jpy14o2lp job_status: Passed torchscript_onnx: - inference_time: 3329.0 - throughput: 300.39050765995796 + inference_time: 3354.0 + throughput: 298.1514609421586 estimated_peak_memory_range: - min: 38957056 - max: 38957056 + min: 37707776 + max: 37707776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -455,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 199 - job_id: j57ylr2v5 + job_id: jpv61lnr5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -464,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-31T14:44:13Z' + timestamp: '2024-11-09T22:30:45Z' diff --git a/qai_hub_models/models/midas_quantized/README.md b/qai_hub_models/models/midas_quantized/README.md index c61ad7c9..d621dd14 100644 --- a/qai_hub_models/models/midas_quantized/README.md +++ b/qai_hub_models/models/midas_quantized/README.md @@ -5,8 +5,7 @@ Midas is designed for estimating depth at each point in an image. -This is based on the implementation of Midas-V2-Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/midas_quantized). diff --git a/qai_hub_models/models/midas_quantized/perf.yaml b/qai_hub_models/models/midas_quantized/perf.yaml index ff004bc2..0d4b7e02 100644 --- a/qai_hub_models/models/midas_quantized/perf.yaml +++ b/qai_hub_models/models/midas_quantized/perf.yaml @@ -46,11 +46,11 @@ models: - name: Midas-V2-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1097.0 - throughput: 911.5770282588878 + inference_time: 1093.0 + throughput: 914.9130832570905 estimated_peak_memory_range: - min: 16384 - max: 1927120 + min: 12288 + max: 1270880 primary_compute_unit: NPU precision: int8 layer_info: @@ -58,14 +58,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jgn639nq5 + job_id: jgjv0r3vg job_status: Passed torchscript_onnx_qnn: - inference_time: 1439.0 - throughput: 694.9270326615705 + inference_time: 1436.0 + throughput: 696.3788300835655 estimated_peak_memory_range: - min: 155648 - max: 65595256 + min: 217088 + max: 7631920 primary_compute_unit: NPU precision: int8 layer_info: @@ -73,7 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 203 - job_id: jprve407g + job_id: jgn69z1k5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -82,36 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:43:39Z' - - torchscript_onnx_qnn: - inference_time: 1332.0 - throughput: 750.7507507507507 - estimated_peak_memory_range: - min: 24576 - max: 1456352 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 203 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 203 - job_id: jp2kl7wqp - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:43:40Z' + timestamp: '2024-11-09T22:29:47Z' - torchscript_onnx_tflite: - inference_time: 754.0 - throughput: 1326.2599469496022 + inference_time: 773.0 + throughput: 1293.6610608020699 estimated_peak_memory_range: min: 12288 - max: 93581632 + max: 93706784 primary_compute_unit: NPU precision: int8 layer_info: @@ -119,14 +96,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: j57ylrrl5 + job_id: jpedr76o5 job_status: Passed torchscript_onnx_qnn: - inference_time: 924.0 - throughput: 1082.2510822510822 + inference_time: 1013.0 + throughput: 987.1668311944719 estimated_peak_memory_range: - min: 208896 - max: 8921568 + min: 12288 + max: 26043856 primary_compute_unit: NPU precision: int8 layer_info: @@ -134,7 +111,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 203 - job_id: jpy164xlp + job_id: jprv4lx0g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -143,13 +120,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-31T14:43:41Z' + timestamp: '2024-11-09T22:29:48Z' - torchscript_onnx_tflite: - inference_time: 730.0 - throughput: 1369.86301369863 + inference_time: 721.0 + throughput: 1386.9625520110958 estimated_peak_memory_range: min: 8192 - max: 49212624 + max: 49025008 primary_compute_unit: NPU precision: int8 layer_info: @@ -157,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jp0z20065 + job_id: jgz3xlzo5 job_status: Passed torchscript_onnx_qnn: - inference_time: 830.0 - throughput: 1204.8192771084337 + inference_time: 984.0 + throughput: 1016.260162601626 estimated_peak_memory_range: - min: 208896 - max: 22006576 + min: 0 + max: 21581328 primary_compute_unit: NPU precision: int8 layer_info: @@ -172,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 203 - job_id: jp0zl1jn5 + job_id: jp2k7rorp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -181,13 +158,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-31T14:43:42Z' + timestamp: '2024-11-09T22:29:49Z' - torchscript_onnx_tflite: - inference_time: 3932.0 - throughput: 254.323499491353 + inference_time: 3670.0 + throughput: 272.47956403269757 estimated_peak_memory_range: - min: 77824 - max: 52709616 + min: 0 + max: 50628384 primary_compute_unit: NPU precision: int8 layer_info: @@ -195,14 +172,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jp4ldrrv5 + job_id: j5wedly35 job_status: Passed torchscript_onnx_qnn: - inference_time: 5780.0 - throughput: 173.01038062283737 + inference_time: 6075.0 + throughput: 164.6090534979424 estimated_peak_memory_range: - min: 237568 - max: 8503920 + min: 258048 + max: 8209632 primary_compute_unit: NPU precision: int8 layer_info: @@ -210,7 +187,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 203 - job_id: jp8qz3xop + job_id: jpy14o88p job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -219,13 +196,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-31T14:43:43Z' + timestamp: '2024-11-09T22:29:51Z' - torchscript_onnx_tflite: - inference_time: 15450.0 - throughput: 64.72491909385113 + inference_time: 15347.0 + throughput: 65.15931452401121 estimated_peak_memory_range: - min: 94208 - max: 6512208 + min: 106496 + max: 3253256 primary_compute_unit: NPU precision: int8 layer_info: @@ -233,7 +210,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jpxk6oo15 + job_id: jg9l3zowg job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -242,13 +219,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-26T23:44:26Z' + timestamp: '2024-11-09T22:29:40Z' - torchscript_onnx_tflite: - inference_time: 1073.0 - throughput: 931.9664492078285 + inference_time: 1083.0 + throughput: 923.3610341643582 estimated_peak_memory_range: - min: 20480 - max: 1526824 + min: 12288 + max: 1466728 primary_compute_unit: NPU precision: int8 layer_info: @@ -256,14 +233,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: j5mn6xxwp + job_id: jp14dno8p job_status: Passed torchscript_onnx_qnn: - inference_time: 1314.0 - throughput: 761.03500761035 + inference_time: 1308.0 + throughput: 764.525993883792 estimated_peak_memory_range: - min: 229376 - max: 1542472 + min: 249856 + max: 1488616 primary_compute_unit: NPU precision: int8 layer_info: @@ -271,7 +248,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 203 - job_id: jgke3l4ng + job_id: jp0z1mo95 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -280,13 +257,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-31T14:43:44Z' + timestamp: '2024-11-09T22:29:52Z' - torchscript_onnx_tflite: - inference_time: 1083.0 - throughput: 923.3610341643582 + inference_time: 1089.0 + throughput: 918.2736455463728 estimated_peak_memory_range: min: 12288 - max: 10595768 + max: 12798944 primary_compute_unit: NPU precision: int8 layer_info: @@ -294,14 +271,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jp8qmo9kp + job_id: jgdxrd6rp job_status: Passed torchscript_onnx_qnn: - inference_time: 1339.0 - throughput: 746.8259895444362 + inference_time: 1318.0 + throughput: 758.7253414264036 estimated_peak_memory_range: - min: 229376 - max: 1571392 + min: 225280 + max: 1347872 primary_compute_unit: NPU precision: int8 layer_info: @@ -309,7 +286,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 203 - job_id: j56yz6j6p + job_id: jgkel26wg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -318,13 +295,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:26:44Z' + timestamp: '2024-11-09T22:29:54Z' - torchscript_onnx_tflite: - inference_time: 1084.0 - throughput: 922.509225092251 + inference_time: 1101.0 + throughput: 908.2652134423251 estimated_peak_memory_range: min: 12288 - max: 3260136 + max: 1430152 primary_compute_unit: NPU precision: int8 layer_info: @@ -332,14 +309,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jgkeqznwg + job_id: j57yjeov5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1316.0 - throughput: 759.8784194528876 + inference_time: 1312.0 + throughput: 762.1951219512196 estimated_peak_memory_range: - min: 16384 - max: 1589024 + min: 262144 + max: 1681688 primary_compute_unit: NPU precision: int8 layer_info: @@ -347,7 +324,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 203 - job_id: jp3j1k33g + job_id: j5q67l4np job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -356,13 +333,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:26:45Z' + timestamp: '2024-11-09T22:29:55Z' - torchscript_onnx_tflite: - inference_time: 1095.0 - throughput: 913.2420091324201 + inference_time: 1078.0 + throughput: 927.643784786642 estimated_peak_memory_range: min: 12288 - max: 2186176 + max: 16552528 primary_compute_unit: NPU precision: int8 layer_info: @@ -370,14 +347,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jglv2nzj5 + job_id: jp4lxye85 job_status: Passed torchscript_onnx_qnn: - inference_time: 1327.0 - throughput: 753.5795026375282 + inference_time: 1320.0 + throughput: 757.5757575757576 estimated_peak_memory_range: - min: 221184 - max: 1504160 + min: 225280 + max: 1883936 primary_compute_unit: NPU precision: int8 layer_info: @@ -385,7 +362,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 203 - job_id: jpv6r3xk5 + job_id: jglv0ywj5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -394,13 +371,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:26:47Z' + timestamp: '2024-11-09T22:29:56Z' - torchscript_onnx_tflite: - inference_time: 1945.0 - throughput: 514.1388174807198 + inference_time: 1928.0 + throughput: 518.6721991701245 estimated_peak_memory_range: - min: 24576 - max: 48416320 + min: 12288 + max: 48216544 primary_compute_unit: NPU precision: int8 layer_info: @@ -408,14 +385,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: j5q6r8knp + job_id: jpxk7l035 job_status: Passed torchscript_onnx_qnn: - inference_time: 2319.0 - throughput: 431.22035360069 + inference_time: 2526.0 + throughput: 395.88281868566906 estimated_peak_memory_range: - min: 212992 - max: 6109856 + min: 0 + max: 5855424 primary_compute_unit: NPU precision: int8 layer_info: @@ -423,7 +400,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 203 - job_id: jgo2ny0qp + job_id: j56y38o6p job_status: Passed reference_device_info: name: SA8295P ADP @@ -432,13 +409,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T01:26:46Z' + timestamp: '2024-11-09T22:29:57Z' - torchscript_onnx_tflite: - inference_time: 1406.0 - throughput: 711.2375533428165 + inference_time: 1417.0 + throughput: 705.7163020465773 estimated_peak_memory_range: - min: 81920 - max: 94319520 + min: 86016 + max: 91458784 primary_compute_unit: NPU precision: int8 layer_info: @@ -446,14 +423,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jpy1j337p + job_id: j5mnw09dp job_status: Passed torchscript_onnx_qnn: - inference_time: 1775.0 - throughput: 563.3802816901408 + inference_time: 1776.0 + throughput: 563.063063063063 estimated_peak_memory_range: min: 208896 - max: 29040128 + max: 26710736 primary_compute_unit: NPU precision: int8 layer_info: @@ -461,7 +438,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 203 - job_id: jpv6v1lr5 + job_id: jp3j4zo3g job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -470,10 +447,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-31T14:43:50Z' + timestamp: '2024-11-09T22:29:59Z' - torchscript_onnx_qnn: - inference_time: 1533.0 - throughput: 652.3157208088714 + inference_time: 1479.0 + throughput: 676.132521974307 estimated_peak_memory_range: min: 425984 max: 425984 @@ -484,7 +461,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 203 - job_id: j5q637yop + job_id: jp8q3ejkp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -493,4 +470,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-31T14:43:45Z' + timestamp: '2024-11-09T22:29:53Z' diff --git a/qai_hub_models/models/mistral_3b_quantized/README.md b/qai_hub_models/models/mistral_3b_quantized/README.md index a8500d9f..0da98d67 100644 --- a/qai_hub_models/models/mistral_3b_quantized/README.md +++ b/qai_hub_models/models/mistral_3b_quantized/README.md @@ -5,8 +5,7 @@ Mistral 3B model is Mistral AI's first generation edge model, optimized for optimal performance on Snapdragon platforms. -This is based on the implementation of Mistral-3B found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/mistral_3b_quantized). diff --git a/qai_hub_models/models/mistral_7b_instruct_v0_3_quantized/README.md b/qai_hub_models/models/mistral_7b_instruct_v0_3_quantized/README.md index c93301ee..8c0d000b 100644 --- a/qai_hub_models/models/mistral_7b_instruct_v0_3_quantized/README.md +++ b/qai_hub_models/models/mistral_7b_instruct_v0_3_quantized/README.md @@ -5,8 +5,7 @@ Mistral AI's first open source dense model released September 2023. Mistral-7B-Instruct-v0.3 Large Language Model (LLM) is an instruct fine‑tuned version of the Mistral‑7B‑v0.3. It has an extended vocabulary and supports the v3 Tokenizer, enhancing language understanding and generation. Additionally function calling is enabled. -This is based on the implementation of Mistral-7B-Instruct-v0.3 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/mistral_7b_instruct_v0_3_quantized). diff --git a/qai_hub_models/models/mnasnet05/README.md b/qai_hub_models/models/mnasnet05/README.md index adc79e3c..b3a93a9e 100644 --- a/qai_hub_models/models/mnasnet05/README.md +++ b/qai_hub_models/models/mnasnet05/README.md @@ -5,8 +5,7 @@ MNASNet05 is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of MNASNet05 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/mnasnet05). diff --git a/qai_hub_models/models/mnasnet05/perf.yaml b/qai_hub_models/models/mnasnet05/perf.yaml index 73b4e08e..245a8e42 100644 --- a/qai_hub_models/models/mnasnet05/perf.yaml +++ b/qai_hub_models/models/mnasnet05/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: MNASNet05 performance_metrics: - torchscript_onnx_tflite: - inference_time: 755.0 - throughput: 1324.5033112582782 + inference_time: 753.0 + throughput: 1328.0212483399735 estimated_peak_memory_range: - min: 16384 - max: 2614664 + min: 24576 + max: 1384760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: j56yn37yp + job_id: jglv0yxj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 830.0 - throughput: 1204.8192771084337 + inference_time: 817.0 + throughput: 1223.9902080783354 estimated_peak_memory_range: - min: 12288 - max: 33256208 + min: 548864 + max: 22963176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jp3je49ng + job_id: jg9l3zzwg job_status: Passed torchscript_onnx: - inference_time: 748.0 - throughput: 1336.8983957219252 + inference_time: 746.0 + throughput: 1340.4825737265417 estimated_peak_memory_range: - min: 618496 - max: 151654856 + min: 614400 + max: 151761056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: j57yxj495 + job_id: jpy14oo8p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,36 +94,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:42:52Z' - - torchscript_onnx_qnn: - inference_time: 758.0 - throughput: 1319.2612137203166 + timestamp: '2024-11-09T22:28:43Z' + - torchscript_onnx_tflite: + inference_time: 516.0 + throughput: 1937.984496124031 estimated_peak_memory_range: - min: 32768 - max: 1665272 + min: 12288 + max: 51863152 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 103 + layers_on_npu: 71 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 103 - job_id: jgo231rkp + total_layers: 71 + job_id: j56y3876p job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:42:43Z' - - torchscript_onnx_qnn: - inference_time: 518.0 - throughput: 1930.5019305019305 + torchscript_onnx_qnn: + inference_time: 560.0 + throughput: 1785.7142857142858 estimated_peak_memory_range: min: 0 - max: 8611072 + max: 16956656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -129,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jpv6v1dr5 + job_id: jp14dnn8p job_status: Passed torchscript_onnx: - inference_time: 563.0 - throughput: 1776.1989342806394 + inference_time: 541.0 + throughput: 1848.4288354898335 estimated_peak_memory_range: min: 0 - max: 56234368 + max: 55422672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -144,7 +138,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: jgo2kqqxp + job_id: jp0z1mm95 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -153,13 +147,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-31T14:42:44Z' - - torchscript_onnx_qnn: - inference_time: 577.0 - throughput: 1733.102253032929 + timestamp: '2024-11-09T22:28:44Z' + - torchscript_onnx_tflite: + inference_time: 506.0 + throughput: 1976.2845849802372 + estimated_peak_memory_range: + min: 8192 + max: 22631600 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 71 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 71 + job_id: jp3j4z93g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 552.0 + throughput: 1811.5942028985507 estimated_peak_memory_range: min: 0 - max: 13289504 + max: 12776256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -167,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jgjve07eg + job_id: jgdxrddrp job_status: Passed torchscript_onnx: - inference_time: 573.0 - throughput: 1745.2006980802792 + inference_time: 467.0 + throughput: 2141.3276231263385 estimated_peak_memory_range: min: 0 - max: 24207920 + max: 23689456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -182,7 +191,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: jgjvz44xg + job_id: jp8q3eekp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -191,13 +200,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-31T14:42:45Z' - - torchscript_onnx_qnn: + timestamp: '2024-11-09T22:28:45Z' + - torchscript_onnx_tflite: inference_time: 755.0 throughput: 1324.5033112582782 + estimated_peak_memory_range: + min: 12288 + max: 1304552 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 71 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 71 + job_id: jgo21lrqp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 760.0 + throughput: 1315.7894736842106 estimated_peak_memory_range: min: 638976 - max: 2135440 + max: 1763800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -205,7 +229,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jpedkrzv5 + job_id: j57yjeev5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -214,13 +238,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-31T14:42:46Z' + timestamp: '2024-11-09T22:28:35Z' - torchscript_onnx_tflite: - inference_time: 759.0 - throughput: 1317.5230566534915 + inference_time: 757.0 + throughput: 1321.003963011889 estimated_peak_memory_range: - min: 16384 - max: 1492360 + min: 12288 + max: 6485488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -228,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: j57ylzwv5 + job_id: jpv61llk5 job_status: Passed torchscript_onnx_qnn: - inference_time: 765.0 - throughput: 1307.18954248366 + inference_time: 758.0 + throughput: 1319.2612137203166 estimated_peak_memory_range: - min: 638976 - max: 1805592 + min: 634880 + max: 2327912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -243,7 +267,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: j5mn6r2dp + job_id: jpxk7ll35 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -252,13 +276,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:25:55Z' + timestamp: '2024-11-09T22:28:38Z' - torchscript_onnx_tflite: - inference_time: 757.0 - throughput: 1321.003963011889 + inference_time: 753.0 + throughput: 1328.0212483399735 estimated_peak_memory_range: min: 24576 - max: 21471536 + max: 1994472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -266,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jp4ldqo85 + job_id: jgjv0rrvg job_status: Passed torchscript_onnx_qnn: - inference_time: 768.0 - throughput: 1302.0833333333333 + inference_time: 759.0 + throughput: 1317.5230566534915 estimated_peak_memory_range: - min: 647168 - max: 2302384 + min: 634880 + max: 2313048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -281,7 +305,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jgn6m2yk5 + job_id: j5mnw00dp job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -290,13 +314,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:25:56Z' + timestamp: '2024-11-09T22:28:39Z' - torchscript_onnx_tflite: inference_time: 757.0 throughput: 1321.003963011889 estimated_peak_memory_range: - min: 32768 - max: 1540096 + min: 24576 + max: 160550136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -304,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jpxk6vj35 + job_id: jpedr77o5 job_status: Passed torchscript_onnx_qnn: - inference_time: 760.0 - throughput: 1315.7894736842106 + inference_time: 764.0 + throughput: 1308.9005235602094 estimated_peak_memory_range: - min: 647168 - max: 1939376 + min: 667648 + max: 1789384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -319,7 +343,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jprv2kq0g + job_id: jgn69zzk5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -328,13 +352,66 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:25:57Z' - - torchscript_onnx_qnn: - inference_time: 1100.0 - throughput: 909.0909090909091 + timestamp: '2024-11-09T22:28:40Z' + - torchscript_onnx_tflite: + inference_time: 1472.0 + throughput: 679.3478260869565 + estimated_peak_memory_range: + min: 12288 + max: 23342304 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 71 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 71 + job_id: jgz3xllo5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1622.0 + throughput: 616.5228113440197 + estimated_peak_memory_range: + min: 618496 + max: 6515248 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 103 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 103 + job_id: jprv4ll0g + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:28:41Z' + - torchscript_onnx_tflite: + inference_time: 1024.0 + throughput: 976.5625 + estimated_peak_memory_range: + min: 16384 + max: 53630672 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 71 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 71 + job_id: j5wedll35 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1107.0 + throughput: 903.342366757001 estimated_peak_memory_range: min: 618496 - max: 16612416 + max: 18920208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -342,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jgdxor3zp + job_id: jp2k7rrrp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -351,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-31T14:42:51Z' + timestamp: '2024-11-09T22:28:42Z' - torchscript_onnx_qnn: - inference_time: 891.0 - throughput: 1122.334455667789 + inference_time: 904.0 + throughput: 1106.1946902654868 estimated_peak_memory_range: min: 602112 max: 602112 @@ -365,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jgz3rxmx5 + job_id: jp4lxyy85 job_status: Passed torchscript_onnx: - inference_time: 825.0 - throughput: 1212.121212121212 + inference_time: 841.0 + throughput: 1189.0606420927468 estimated_peak_memory_range: - min: 5787648 - max: 5787648 + min: 6942720 + max: 6942720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -380,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: jpv60xxj5 + job_id: jgkel22wg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -389,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-31T14:42:47Z' + timestamp: '2024-11-09T22:28:46Z' diff --git a/qai_hub_models/models/mobilenet_v2/README.md b/qai_hub_models/models/mobilenet_v2/README.md index 62926fb4..97d67f50 100644 --- a/qai_hub_models/models/mobilenet_v2/README.md +++ b/qai_hub_models/models/mobilenet_v2/README.md @@ -5,8 +5,7 @@ MobileNetV2 is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of MobileNet-v2 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/mobilenet_v2). diff --git a/qai_hub_models/models/mobilenet_v2/perf.yaml b/qai_hub_models/models/mobilenet_v2/perf.yaml index 80e83608..6b96e733 100644 --- a/qai_hub_models/models/mobilenet_v2/perf.yaml +++ b/qai_hub_models/models/mobilenet_v2/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: MobileNet-v2 performance_metrics: - torchscript_onnx_tflite: - inference_time: 899.0 - throughput: 1112.3470522803113 + inference_time: 902.0 + throughput: 1108.6474501108648 estimated_peak_memory_range: - min: 20480 - max: 2281536 + min: 12288 + max: 3485536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jp14ed67p + job_id: jp0z1m695 job_status: Passed torchscript_onnx_qnn: - inference_time: 1249.0 - throughput: 800.640512409928 + inference_time: 1254.0 + throughput: 797.4481658692185 estimated_peak_memory_range: - min: 16384 - max: 39290192 + min: 622592 + max: 50320488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jgdxor2zp + job_id: jgjv0r7vg job_status: Passed torchscript_onnx: - inference_time: 929.0 - throughput: 1076.4262648008612 + inference_time: 905.0 + throughput: 1104.9723756906078 estimated_peak_memory_range: - min: 544768 - max: 2114328 + min: 618496 + max: 2876496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jp8qz31op + job_id: j5mnw0mdp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:42:26Z' - - torchscript_onnx_qnn: - inference_time: 1196.0 - throughput: 836.1204013377926 + timestamp: '2024-11-09T22:28:02Z' + - torchscript_onnx_tflite: + inference_time: 612.0 + throughput: 1633.986928104575 estimated_peak_memory_range: - min: 24576 - max: 1347904 + min: 16384 + max: 64324208 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 72 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 72 + job_id: jp8q3e1kp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 842.0 + throughput: 1187.648456057007 + estimated_peak_memory_range: + min: 0 + max: 17371808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,22 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: j57yxj995 + job_id: jpedr7zo5 job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:42:18Z' - - torchscript_onnx_qnn: - inference_time: 801.0 - throughput: 1248.4394506866417 + torchscript_onnx: + inference_time: 650.0 + throughput: 1538.4615384615386 estimated_peak_memory_range: min: 0 - max: 8391408 + max: 67757984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -129,7 +138,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jp4lvx315 + job_id: jgn69znk5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -138,13 +147,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-31T14:42:19Z' - - torchscript_onnx_qnn: - inference_time: 692.0 - throughput: 1445.086705202312 + timestamp: '2024-11-09T22:28:03Z' + - torchscript_onnx_tflite: + inference_time: 613.0 + throughput: 1631.3213703099511 + estimated_peak_memory_range: + min: 8192 + max: 24972864 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 72 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 72 + job_id: jgkel28wg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 849.0 + throughput: 1177.8563015312131 estimated_peak_memory_range: - min: 614400 - max: 14934608 + min: 0 + max: 14492528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -152,7 +176,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jpxky7xl5 + job_id: jgz3xlmo5 + job_status: Passed + torchscript_onnx: + inference_time: 687.0 + throughput: 1455.604075691412 + estimated_peak_memory_range: + min: 0 + max: 25542624 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 105 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 105 + job_id: jprv4l00g job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -161,13 +200,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-31T14:42:19Z' - - torchscript_onnx_qnn: - inference_time: 1186.0 - throughput: 843.1703204047218 + timestamp: '2024-11-09T22:28:04Z' + - torchscript_onnx_tflite: + inference_time: 901.0 + throughput: 1109.8779134295228 estimated_peak_memory_range: - min: 630784 - max: 1796200 + min: 20480 + max: 1983304 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 72 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 72 + job_id: j5q67lvnp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1184.0 + throughput: 844.5945945945946 + estimated_peak_memory_range: + min: 643072 + max: 2196752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -175,7 +229,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: j5mn3w89p + job_id: j5wedl735 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +238,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-31T14:42:20Z' + timestamp: '2024-11-09T22:27:53Z' - torchscript_onnx_tflite: - inference_time: 903.0 - throughput: 1107.4197120708748 + inference_time: 904.0 + throughput: 1106.1946902654868 estimated_peak_memory_range: - min: 24576 - max: 1507480 + min: 32768 + max: 185483864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -198,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jpedw9915 + job_id: jglv0ylj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1193.0 - throughput: 838.2229673093043 + inference_time: 1186.0 + throughput: 843.1703204047218 estimated_peak_memory_range: - min: 634880 - max: 1904592 + min: 2154496 + max: 3817136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -213,7 +267,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jg9lyv1lg + job_id: jp14dnj8p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -222,13 +276,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:25:36Z' + timestamp: '2024-11-09T22:27:56Z' - torchscript_onnx_tflite: - inference_time: 904.0 - throughput: 1106.1946902654868 + inference_time: 898.0 + throughput: 1113.5857461024498 estimated_peak_memory_range: - min: 36864 - max: 174576560 + min: 16384 + max: 2462008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -236,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jgz3jeek5 + job_id: j56y38w6p job_status: Passed torchscript_onnx_qnn: - inference_time: 1196.0 - throughput: 836.1204013377926 + inference_time: 1192.0 + throughput: 838.9261744966443 estimated_peak_memory_range: - min: 626688 - max: 2140344 + min: 2715648 + max: 4018256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -251,7 +305,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jp14w0l2p + job_id: jgdxrd3rp job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -260,13 +314,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:25:37Z' + timestamp: '2024-11-09T22:27:57Z' - torchscript_onnx_tflite: - inference_time: 905.0 - throughput: 1104.9723756906078 + inference_time: 909.0 + throughput: 1100.1100110011 estimated_peak_memory_range: - min: 20480 - max: 1161976 + min: 24576 + max: 2108200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -274,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: j5we3ov65 + job_id: jp3j4z63g job_status: Passed torchscript_onnx_qnn: - inference_time: 1190.0 - throughput: 840.3361344537815 + inference_time: 1194.0 + throughput: 837.5209380234506 estimated_peak_memory_range: - min: 28672 - max: 1392216 + min: 634880 + max: 1978944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -289,7 +343,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jgdxqw9ep + job_id: j57yje4v5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -298,13 +352,66 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:25:38Z' - - torchscript_onnx_qnn: - inference_time: 1437.0 - throughput: 695.8942240779402 + timestamp: '2024-11-09T22:27:58Z' + - torchscript_onnx_tflite: + inference_time: 1513.0 + throughput: 660.9385327164574 + estimated_peak_memory_range: + min: 12288 + max: 24624096 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 72 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 72 + job_id: jgo21l8qp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 2093.0 + throughput: 477.78308647873865 + estimated_peak_memory_range: + min: 0 + max: 5659344 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 105 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 105 + job_id: jp4lxy185 + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:27:59Z' + - torchscript_onnx_tflite: + inference_time: 1081.0 + throughput: 925.0693802035153 + estimated_peak_memory_range: + min: 16384 + max: 65265776 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 72 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 72 + job_id: jpv61ldk5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1442.0 + throughput: 693.4812760055479 estimated_peak_memory_range: min: 618496 - max: 19272496 + max: 19855216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -312,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jp0zl16n5 + job_id: jpxk7l435 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -321,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-31T14:42:25Z' + timestamp: '2024-11-09T22:28:00Z' - torchscript_onnx_qnn: - inference_time: 1383.0 - throughput: 723.0657989877079 + inference_time: 1372.0 + throughput: 728.862973760933 estimated_peak_memory_range: min: 602112 max: 602112 @@ -335,7 +442,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jgn639kq5 + job_id: jg9l3zmwg + job_status: Passed + torchscript_onnx: + inference_time: 977.0 + throughput: 1023.5414534288639 + estimated_peak_memory_range: + min: 9134080 + max: 9134080 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 105 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 105 + job_id: jp2k7rwrp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -344,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-31T14:42:21Z' + timestamp: '2024-11-09T22:28:05Z' diff --git a/qai_hub_models/models/mobilenet_v2_quantized/README.md b/qai_hub_models/models/mobilenet_v2_quantized/README.md index 378f950e..ff33fbf6 100644 --- a/qai_hub_models/models/mobilenet_v2_quantized/README.md +++ b/qai_hub_models/models/mobilenet_v2_quantized/README.md @@ -5,8 +5,7 @@ MobileNetV2 is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of MobileNet-v2-Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/mobilenet_v2_quantized). diff --git a/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml b/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml index 1374df7b..f718cf83 100644 --- a/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml +++ b/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml @@ -48,50 +48,12 @@ aggregated: models: - name: MobileNet-v2-Quantized performance_metrics: - - torchscript_onnx_tflite: - inference_time: 428.0 - throughput: 2336.448598130841 - estimated_peak_memory_range: - min: 12288 - max: 1395736 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 109 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 109 - job_id: jp3j1vnlg - job_status: Passed - torchscript_onnx_qnn: - inference_time: 665.0 - throughput: 1503.7593984962407 - estimated_peak_memory_range: - min: 24576 - max: 16293136 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 106 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 106 - job_id: jpxk6nn15 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:50:13Z' - torchscript_onnx_tflite: inference_time: 435.0 throughput: 2298.8505747126437 estimated_peak_memory_range: min: 12288 - max: 2303400 + max: 1383016 primary_compute_unit: NPU precision: int8 layer_info: @@ -99,14 +61,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 109 - job_id: jgo2nkzxp + job_id: jp0z18005 job_status: Passed torchscript_onnx_qnn: - inference_time: 665.0 - throughput: 1503.7593984962407 + inference_time: 671.0 + throughput: 1490.312965722802 estimated_peak_memory_range: - min: 12288 - max: 5695456 + min: 28672 + max: 5829088 primary_compute_unit: NPU precision: int8 layer_info: @@ -114,7 +76,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: j5mn6qqwp + job_id: jgz3xqd45 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -123,13 +85,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:50:14Z' + timestamp: '2024-11-09T23:45:47Z' - torchscript_onnx_tflite: - inference_time: 302.0 - throughput: 3311.2582781456954 + inference_time: 307.0 + throughput: 3257.328990228013 estimated_peak_memory_range: min: 12288 - max: 45096752 + max: 44674832 primary_compute_unit: NPU precision: int8 layer_info: @@ -137,14 +99,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 109 - job_id: jpv6r0qj5 + job_id: jp8q3dyqp job_status: Passed torchscript_onnx_qnn: - inference_time: 643.0 - throughput: 1555.2099533437015 + inference_time: 484.0 + throughput: 2066.115702479339 estimated_peak_memory_range: - min: 159744 - max: 18521488 + min: 0 + max: 16517904 primary_compute_unit: NPU precision: int8 layer_info: @@ -152,7 +114,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jgn6mllr5 + job_id: j5wed0645 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -161,13 +123,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:50:16Z' + timestamp: '2024-11-09T23:45:49Z' - torchscript_onnx_tflite: - inference_time: 243.0 - throughput: 4115.22633744856 + inference_time: 240.0 + throughput: 4166.666666666667 estimated_peak_memory_range: min: 8192 - max: 28295824 + max: 28385984 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,14 +137,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 109 - job_id: j57yl00l5 + job_id: jgkelwxvg job_status: Passed torchscript_onnx_qnn: - inference_time: 405.0 - throughput: 2469.135802469136 + inference_time: 513.0 + throughput: 1949.317738791423 estimated_peak_memory_range: - min: 159744 - max: 14139472 + min: 8192 + max: 13976176 primary_compute_unit: NPU precision: int8 layer_info: @@ -190,7 +152,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jglv26685 + job_id: jg9l37nmg job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -199,13 +161,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:50:19Z' + timestamp: '2024-11-09T23:45:50Z' - torchscript_onnx_tflite: - inference_time: 1083.0 - throughput: 923.3610341643582 + inference_time: 1071.0 + throughput: 933.7068160597572 estimated_peak_memory_range: - min: 12288 - max: 28750752 + min: 20480 + max: 28790704 primary_compute_unit: NPU precision: int8 layer_info: @@ -213,14 +175,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 109 - job_id: jgjv2zdxg + job_id: j5q67xqep job_status: Passed torchscript_onnx_qnn: - inference_time: 1467.0 - throughput: 681.6632583503749 + inference_time: 1485.0 + throughput: 673.4006734006734 estimated_peak_memory_range: - min: 16384 - max: 8638288 + min: 12288 + max: 8316256 primary_compute_unit: NPU precision: int8 layer_info: @@ -228,7 +190,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jprv2889g + job_id: jp14dkznp job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -237,13 +199,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-27T00:49:56Z' + timestamp: '2024-11-09T23:45:33Z' - torchscript_onnx_tflite: - inference_time: 12242.0 - throughput: 81.685999019768 + inference_time: 12717.0 + throughput: 78.63489816780687 estimated_peak_memory_range: - min: 16384 - max: 6370544 + min: 12288 + max: 12966464 primary_compute_unit: NPU precision: int8 layer_info: @@ -251,7 +213,7 @@ models: layers_on_gpu: 2 layers_on_cpu: 0 total_layers: 109 - job_id: jpedweo15 + job_id: jglv09m25 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -260,13 +222,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-27T00:49:38Z' + timestamp: '2024-11-09T23:45:16Z' - torchscript_onnx_tflite: - inference_time: 432.0 - throughput: 2314.814814814815 + inference_time: 424.0 + throughput: 2358.490566037736 estimated_peak_memory_range: - min: 16384 - max: 1429568 + min: 12288 + max: 1230736 primary_compute_unit: NPU precision: int8 layer_info: @@ -274,14 +236,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 109 - job_id: jgz3jo2k5 + job_id: j56y394np job_status: Passed torchscript_onnx_qnn: - inference_time: 614.0 - throughput: 1628.6644951140065 + inference_time: 627.0 + throughput: 1594.896331738437 estimated_peak_memory_range: - min: 172032 - max: 1856216 + min: 188416 + max: 1420288 primary_compute_unit: NPU precision: int8 layer_info: @@ -289,7 +251,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jp2k9004p + job_id: jgdxry16p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -298,13 +260,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:49:58Z' + timestamp: '2024-11-09T23:45:35Z' - torchscript_onnx_tflite: - inference_time: 432.0 - throughput: 2314.814814814815 + inference_time: 430.0 + throughput: 2325.5813953488373 estimated_peak_memory_range: min: 12288 - max: 1527848 + max: 1412624 primary_compute_unit: NPU precision: int8 layer_info: @@ -312,14 +274,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 109 - job_id: jp8qmvykp + job_id: jp3j4l0mg job_status: Passed torchscript_onnx_qnn: - inference_time: 614.0 - throughput: 1628.6644951140065 + inference_time: 619.0 + throughput: 1615.5088852988692 estimated_peak_memory_range: - min: 184320 - max: 1661904 + min: 180224 + max: 1840656 primary_compute_unit: NPU precision: int8 layer_info: @@ -327,7 +289,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: j56yzl46p + job_id: jp4lx6r25 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -336,13 +298,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T02:07:03Z' + timestamp: '2024-11-09T23:45:38Z' - torchscript_onnx_tflite: - inference_time: 429.0 - throughput: 2331.002331002331 + inference_time: 440.0 + throughput: 2272.7272727272725 estimated_peak_memory_range: - min: 36864 - max: 2131304 + min: 12288 + max: 1388032 primary_compute_unit: NPU precision: int8 layer_info: @@ -350,14 +312,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 109 - job_id: jgkeqmxwg + job_id: jgo21761p job_status: Passed torchscript_onnx_qnn: - inference_time: 620.0 - throughput: 1612.9032258064517 + inference_time: 618.0 + throughput: 1618.1229773462783 estimated_peak_memory_range: min: 180224 - max: 1297936 + max: 1528744 primary_compute_unit: NPU precision: int8 layer_info: @@ -365,7 +327,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jp3j1203g + job_id: jpxk78o85 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -374,13 +336,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T02:07:05Z' + timestamp: '2024-11-09T23:45:40Z' - torchscript_onnx_tflite: - inference_time: 435.0 - throughput: 2298.8505747126437 + inference_time: 431.0 + throughput: 2320.185614849188 estimated_peak_memory_range: - min: 16384 - max: 1427616 + min: 32768 + max: 103396912 primary_compute_unit: NPU precision: int8 layer_info: @@ -388,14 +350,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 109 - job_id: jglv2rmj5 + job_id: jpv61ykz5 job_status: Passed torchscript_onnx_qnn: - inference_time: 616.0 - throughput: 1623.3766233766235 + inference_time: 618.0 + throughput: 1618.1229773462783 estimated_peak_memory_range: min: 184320 - max: 1546616 + max: 1563136 primary_compute_unit: NPU precision: int8 layer_info: @@ -403,7 +365,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jpv6rx6k5 + job_id: j5mnw1x7p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -412,13 +374,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T02:07:09Z' + timestamp: '2024-11-09T23:45:42Z' - torchscript_onnx_tflite: - inference_time: 811.0 - throughput: 1233.0456226880394 + inference_time: 813.0 + throughput: 1230.0123001230013 estimated_peak_memory_range: min: 12288 - max: 27461504 + max: 27778368 primary_compute_unit: NPU precision: int8 layer_info: @@ -426,14 +388,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 109 - job_id: j5q6roqnp + job_id: jgjv06n1g job_status: Passed torchscript_onnx_qnn: - inference_time: 1134.0 - throughput: 881.8342151675485 + inference_time: 1199.0 + throughput: 834.0283569641368 estimated_peak_memory_range: min: 0 - max: 5946816 + max: 5726720 primary_compute_unit: NPU precision: int8 layer_info: @@ -441,7 +403,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jgo2nq6qp + job_id: jgn69dvj5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -450,13 +412,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T02:07:07Z' + timestamp: '2024-11-09T23:45:43Z' - torchscript_onnx_tflite: - inference_time: 489.0 - throughput: 2044.9897750511248 + inference_time: 479.0 + throughput: 2087.6826722338205 estimated_peak_memory_range: min: 12288 - max: 46187504 + max: 45150848 primary_compute_unit: NPU precision: int8 layer_info: @@ -464,14 +426,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 109 - job_id: jgdxqeeep + job_id: jpedr0m85 job_status: Passed torchscript_onnx_qnn: - inference_time: 728.0 - throughput: 1373.6263736263736 + inference_time: 722.0 + throughput: 1385.0415512465374 estimated_peak_memory_range: - min: 159744 - max: 19648432 + min: 172032 + max: 19792432 primary_compute_unit: NPU precision: int8 layer_info: @@ -479,7 +441,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: j5q6ree4p + job_id: jprv4m3kg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -488,13 +450,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:50:07Z' + timestamp: '2024-11-09T23:45:45Z' - torchscript_onnx_qnn: - inference_time: 750.0 - throughput: 1333.3333333333333 + inference_time: 753.0 + throughput: 1328.0212483399735 estimated_peak_memory_range: - min: 626688 - max: 626688 + min: 503808 + max: 503808 primary_compute_unit: NPU precision: int8 layer_info: @@ -502,7 +464,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jpy1jrr7p + job_id: j57yj1rn5 + job_status: Passed + torchscript_onnx: + inference_time: 126324.0 + throughput: 7.916152116779076 + estimated_peak_memory_range: + min: 62406656 + max: 62406656 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 370 + layers_on_gpu: 0 + layers_on_cpu: 87 + total_layers: 457 + job_id: jp8q3dqqp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -511,4 +488,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:50:18Z' + timestamp: '2024-11-09T23:45:52Z' diff --git a/qai_hub_models/models/mobilenet_v3_large/README.md b/qai_hub_models/models/mobilenet_v3_large/README.md index f2e5c92e..aa9a6f0b 100644 --- a/qai_hub_models/models/mobilenet_v3_large/README.md +++ b/qai_hub_models/models/mobilenet_v3_large/README.md @@ -5,8 +5,7 @@ MobileNet-v3-Large is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of MobileNet-v3-Large found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/mobilenet_v3_large). diff --git a/qai_hub_models/models/mobilenet_v3_large/perf.yaml b/qai_hub_models/models/mobilenet_v3_large/perf.yaml index ea45ec94..47955beb 100644 --- a/qai_hub_models/models/mobilenet_v3_large/perf.yaml +++ b/qai_hub_models/models/mobilenet_v3_large/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: MobileNet-v3-Large performance_metrics: - torchscript_onnx_tflite: - inference_time: 986.0 - throughput: 1014.1987829614604 + inference_time: 989.0 + throughput: 1011.1223458038422 estimated_peak_memory_range: - min: 24576 - max: 4826640 + min: 16384 + max: 1532952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jpy1647lp + job_id: jpy14o77p job_status: Passed torchscript_onnx_qnn: - inference_time: 1044.0 - throughput: 957.8544061302682 + inference_time: 1041.0 + throughput: 960.6147934678194 estimated_peak_memory_range: min: 16384 - max: 57235816 + max: 67397512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jp0zl1vn5 + job_id: jgjv0rwxg job_status: Passed torchscript_onnx: - inference_time: 996.0 - throughput: 1004.0160642570281 + inference_time: 1039.0 + throughput: 962.4639076034649 estimated_peak_memory_range: - min: 12288 - max: 15709688 + min: 32768 + max: 74447000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jpedkryv5 + job_id: jgdxrd2rp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:42:00Z' - - torchscript_onnx_qnn: - inference_time: 999.0 - throughput: 1001.001001001001 + timestamp: '2024-11-09T22:27:20Z' + - torchscript_onnx_tflite: + inference_time: 673.0 + throughput: 1485.8841010401188 estimated_peak_memory_range: - min: 634880 - max: 2069632 + min: 16384 + max: 68250448 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 128 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 128 + job_id: jp0z1mv65 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 712.0 + throughput: 1404.4943820224719 + estimated_peak_memory_range: + min: 4190208 + max: 22455008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,30 +123,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jp8qz34op + job_id: jpedr7l15 job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:41:52Z' - - torchscript_onnx_qnn: - inference_time: 679.0 - throughput: 1472.7540500736377 + torchscript_onnx: + inference_time: 721.0 + throughput: 1386.9625520110958 estimated_peak_memory_range: min: 0 - max: 8579776 + max: 69225696 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 144 + layers_on_npu: 146 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 144 - job_id: jgke3l9ng + total_layers: 146 + job_id: j57yje9v5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -138,13 +147,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-31T14:41:53Z' - - torchscript_onnx_qnn: - inference_time: 712.0 - throughput: 1404.4943820224719 + timestamp: '2024-11-09T22:27:21Z' + - torchscript_onnx_tflite: + inference_time: 678.0 + throughput: 1474.9262536873157 estimated_peak_memory_range: - min: 0 - max: 14660624 + min: 12288 + max: 26115376 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 128 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 128 + job_id: jp8q3e4xp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 592.0 + throughput: 1689.1891891891892 + estimated_peak_memory_range: + min: 614400 + max: 14802768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -152,7 +176,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: j5q637mop + job_id: jgz3xl4k5 + job_status: Passed + torchscript_onnx: + inference_time: 617.0 + throughput: 1620.7455429497568 + estimated_peak_memory_range: + min: 0 + max: 27595296 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 146 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 146 + job_id: jp4lxy385 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -161,13 +200,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-31T14:41:54Z' - - torchscript_onnx_qnn: - inference_time: 993.0 - throughput: 1007.0493454179255 + timestamp: '2024-11-09T22:27:22Z' + - torchscript_onnx_tflite: + inference_time: 992.0 + throughput: 1008.0645161290323 + estimated_peak_memory_range: + min: 45056 + max: 17669936 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 128 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 128 + job_id: j5q67lm4p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1022.0 + throughput: 978.4735812133073 estimated_peak_memory_range: min: 634880 - max: 1782712 + max: 1739960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -175,7 +229,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jglv30lm5 + job_id: j5wedl465 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +238,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-31T14:41:55Z' + timestamp: '2024-11-09T22:27:12Z' - torchscript_onnx_tflite: - inference_time: 990.0 - throughput: 1010.10101010101 + inference_time: 992.0 + throughput: 1008.0645161290323 estimated_peak_memory_range: - min: 20480 - max: 1429480 + min: 28672 + max: 1487128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -198,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jp0z2yy65 + job_id: jglv0y185 job_status: Passed torchscript_onnx_qnn: - inference_time: 995.0 - throughput: 1005.0251256281407 + inference_time: 1006.0 + throughput: 994.0357852882704 estimated_peak_memory_range: - min: 2113536 - max: 3697824 + min: 16384 + max: 1510952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -213,7 +267,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: j5q6r884p + job_id: jp14dn62p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -222,13 +276,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:25:17Z' + timestamp: '2024-11-09T22:27:14Z' - torchscript_onnx_tflite: inference_time: 990.0 throughput: 1010.10101010101 estimated_peak_memory_range: - min: 24576 - max: 35605552 + min: 16384 + max: 238964360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -236,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jp8qmooxp + job_id: j56y38d0p job_status: Passed torchscript_onnx_qnn: - inference_time: 1000.0 - throughput: 1000.0 + inference_time: 996.0 + throughput: 1004.0160642570281 estimated_peak_memory_range: - min: 643072 - max: 2043992 + min: 2449408 + max: 3677176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -251,7 +305,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jglv2nn85 + job_id: jgdxrd2ep job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -260,13 +314,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:25:18Z' + timestamp: '2024-11-09T22:27:15Z' - torchscript_onnx_tflite: - inference_time: 990.0 - throughput: 1010.10101010101 + inference_time: 992.0 + throughput: 1008.0645161290323 estimated_peak_memory_range: - min: 20480 - max: 17610680 + min: 12288 + max: 1586880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -274,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jgkeqzz2g + job_id: jp3j4zwlg job_status: Passed torchscript_onnx_qnn: - inference_time: 997.0 - throughput: 1003.0090270812437 + inference_time: 1001.0 + throughput: 999.000999000999 estimated_peak_memory_range: - min: 634880 - max: 2003584 + min: 647168 + max: 2025944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -289,7 +343,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: j56yz660p + job_id: j5wedl435 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -298,13 +352,66 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:25:19Z' - - torchscript_onnx_qnn: - inference_time: 1459.0 - throughput: 685.4009595613434 + timestamp: '2024-11-09T22:27:16Z' + - torchscript_onnx_tflite: + inference_time: 1877.0 + throughput: 532.7650506126798 + estimated_peak_memory_range: + min: 16384 + max: 27830912 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 128 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 128 + job_id: jgo21l4xp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 2089.0 + throughput: 478.6979415988511 + estimated_peak_memory_range: + min: 0 + max: 5924160 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 146 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 146 + job_id: jg9l3zdwg + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:27:17Z' + - torchscript_onnx_tflite: + inference_time: 1395.0 + throughput: 716.8458781362007 + estimated_peak_memory_range: + min: 20480 + max: 70608880 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 128 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 128 + job_id: jpv61l9j5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1462.0 + throughput: 683.9945280437756 estimated_peak_memory_range: min: 618496 - max: 21609568 + max: 22625920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -312,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jgjve0qeg + job_id: jp14dn68p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -321,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-31T14:41:59Z' + timestamp: '2024-11-09T22:27:19Z' - torchscript_onnx_qnn: - inference_time: 1171.0 - throughput: 853.9709649871904 + inference_time: 1184.0 + throughput: 844.5945945945946 estimated_peak_memory_range: min: 602112 max: 602112 @@ -335,7 +442,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: j56yn3wyp + job_id: jg9l3zdlg + job_status: Passed + torchscript_onnx: + inference_time: 1070.0 + throughput: 934.5794392523364 + estimated_peak_memory_range: + min: 13680640 + max: 13680640 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 146 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 146 + job_id: jpxk7lx35 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -344,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-31T14:41:56Z' + timestamp: '2024-11-09T22:27:23Z' diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/README.md b/qai_hub_models/models/mobilenet_v3_large_quantized/README.md index e799b0e9..6c747499 100644 --- a/qai_hub_models/models/mobilenet_v3_large_quantized/README.md +++ b/qai_hub_models/models/mobilenet_v3_large_quantized/README.md @@ -5,8 +5,7 @@ MobileNet-v3-Large is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of MobileNet-v3-Large-Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/mobilenet_v3_large_quantized). diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml b/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml index 32289511..ccd611ba 100644 --- a/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml +++ b/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml @@ -49,11 +49,11 @@ models: - name: MobileNet-v3-Large-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 346.0 - throughput: 2890.173410404624 + inference_time: 341.0 + throughput: 2932.551319648094 estimated_peak_memory_range: min: 12288 - max: 18380304 + max: 133354544 primary_compute_unit: NPU precision: int8 layer_info: @@ -61,14 +61,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 137 - job_id: jprv28v9g + job_id: jp2k7z6qp job_status: Passed torchscript_onnx_qnn: - inference_time: 630.0 - throughput: 1587.3015873015872 + inference_time: 631.0 + throughput: 1584.7860538827258 estimated_peak_memory_range: min: 16384 - max: 127750744 + max: 14831096 primary_compute_unit: NPU precision: int8 layer_info: @@ -76,75 +76,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jgjv2zvxg - job_status: Passed - torchscript_onnx: - inference_time: 655.0 - throughput: 1526.7175572519084 - estimated_peak_memory_range: - min: 12288 - max: 12095024 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 134 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 134 - job_id: jprv2869g - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:49:04Z' - - torchscript_onnx_tflite: - inference_time: 348.0 - throughput: 2873.5632183908046 - estimated_peak_memory_range: - min: 12288 - max: 4131504 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 137 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 137 - job_id: jp2k90k4p - job_status: Passed - torchscript_onnx_qnn: - inference_time: 630.0 - throughput: 1587.3015873015872 - estimated_peak_memory_range: - min: 12288 - max: 14866424 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 145 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 145 - job_id: jpedwed15 - job_status: Passed - torchscript_onnx: - inference_time: 654.0 - throughput: 1529.051987767584 - estimated_peak_memory_range: - min: 12288 - max: 12108032 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 134 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 134 - job_id: jp2k90x4p + job_id: jgjv084eg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -153,13 +85,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:49:05Z' + timestamp: '2024-11-09T23:44:40Z' - torchscript_onnx_tflite: - inference_time: 239.0 - throughput: 4184.100418410042 + inference_time: 242.0 + throughput: 4132.231404958678 estimated_peak_memory_range: - min: 12288 - max: 54933648 + min: 29814784 + max: 84050384 primary_compute_unit: NPU precision: int8 layer_info: @@ -167,14 +99,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 137 - job_id: jpy1jr17p + job_id: jpy14ywlp job_status: Passed torchscript_onnx_qnn: - inference_time: 456.0 - throughput: 2192.9824561403507 + inference_time: 452.0 + throughput: 2212.3893805309735 estimated_peak_memory_range: - min: 163840 - max: 19077584 + min: 0 + max: 18407728 primary_compute_unit: NPU precision: int8 layer_info: @@ -182,22 +114,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jgz3jo3k5 - job_status: Passed - torchscript_onnx: - inference_time: 560.0 - throughput: 1785.7142857142858 - estimated_peak_memory_range: - min: 12288 - max: 85586624 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 134 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 134 - job_id: jpy1jrz7p + job_id: jpedrn3v5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -206,13 +123,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:49:07Z' + timestamp: '2024-11-09T23:44:41Z' - torchscript_onnx_tflite: - inference_time: 209.0 - throughput: 4784.688995215311 + inference_time: 247.0 + throughput: 4048.582995951417 estimated_peak_memory_range: - min: 12288 - max: 31331936 + min: 8192 + max: 31161936 primary_compute_unit: NPU precision: int8 layer_info: @@ -220,14 +137,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 137 - job_id: jgo2nk2xp + job_id: jp0z1xqn5 job_status: Passed torchscript_onnx_qnn: - inference_time: 457.0 - throughput: 2188.183807439825 + inference_time: 380.0 + throughput: 2631.5789473684213 estimated_peak_memory_range: - min: 8192 - max: 13963376 + min: 0 + max: 13805088 primary_compute_unit: NPU precision: int8 layer_info: @@ -235,22 +152,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: j5mn6qewp - job_status: Passed - torchscript_onnx: - inference_time: 437.0 - throughput: 2288.329519450801 - estimated_peak_memory_range: - min: 8192 - max: 38873712 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 134 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 134 - job_id: jp8qm02xp + job_id: jgz3x0kx5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -259,13 +161,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:49:11Z' + timestamp: '2024-11-09T23:44:43Z' - torchscript_onnx_tflite: - inference_time: 1276.0 - throughput: 783.6990595611285 + inference_time: 1110.0 + throughput: 900.9009009009009 estimated_peak_memory_range: - min: 16384 - max: 33522448 + min: 12288 + max: 33158192 primary_compute_unit: NPU precision: int8 layer_info: @@ -273,14 +175,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 137 - job_id: jp0z23z65 + job_id: jp8q3k9op job_status: Passed torchscript_onnx_qnn: - inference_time: 1672.0 - throughput: 598.0861244019138 + inference_time: 1700.0 + throughput: 588.2352941176471 estimated_peak_memory_range: - min: 12288 - max: 8138512 + min: 28672 + max: 8434352 primary_compute_unit: NPU precision: int8 layer_info: @@ -288,7 +190,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: j5we32w65 + job_id: j5wedrnm5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -297,13 +199,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-27T00:48:48Z' + timestamp: '2024-11-09T23:44:25Z' - torchscript_onnx_tflite: - inference_time: 6801.0 - throughput: 147.03720041170416 + inference_time: 6713.0 + throughput: 148.96469536719798 estimated_peak_memory_range: - min: 45056 - max: 9044104 + min: 53248 + max: 3002368 primary_compute_unit: NPU precision: int8 layer_info: @@ -311,7 +213,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 137 - job_id: jp8qm0qxp + job_id: jgkelknng job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -320,13 +222,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-27T00:48:29Z' + timestamp: '2024-11-09T23:44:08Z' - torchscript_onnx_tflite: - inference_time: 349.0 - throughput: 2865.3295128939826 + inference_time: 346.0 + throughput: 2890.173410404624 estimated_peak_memory_range: min: 12288 - max: 1401600 + max: 1344024 primary_compute_unit: NPU precision: int8 layer_info: @@ -334,14 +236,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 137 - job_id: jgkeq7e2g + job_id: j5q67dkop job_status: Passed torchscript_onnx_qnn: - inference_time: 578.0 - throughput: 1730.1038062283737 + inference_time: 577.0 + throughput: 1733.102253032929 estimated_peak_memory_range: - min: 184320 - max: 1475080 + min: 176128 + max: 1446544 primary_compute_unit: NPU precision: int8 layer_info: @@ -349,7 +251,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jg9lyj0lg + job_id: jg9l3qe8g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -358,13 +260,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:48:50Z' + timestamp: '2024-11-09T23:44:27Z' - torchscript_onnx_tflite: inference_time: 341.0 throughput: 2932.551319648094 estimated_peak_memory_range: - min: 16384 - max: 1612184 + min: 0 + max: 115172240 primary_compute_unit: NPU precision: int8 layer_info: @@ -372,14 +274,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 137 - job_id: jp14wxz8p + job_id: jglv0qrm5 job_status: Passed torchscript_onnx_qnn: inference_time: 580.0 throughput: 1724.1379310344828 estimated_peak_memory_range: min: 184320 - max: 1550040 + max: 1553136 primary_compute_unit: NPU precision: int8 layer_info: @@ -387,7 +289,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jpxk62o35 + job_id: jgdxrmlzp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -396,13 +298,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T02:06:34Z' + timestamp: '2024-11-09T23:44:31Z' - torchscript_onnx_tflite: inference_time: 348.0 throughput: 2873.5632183908046 estimated_peak_memory_range: min: 12288 - max: 115281696 + max: 3435672 primary_compute_unit: NPU precision: int8 layer_info: @@ -410,14 +312,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 137 - job_id: jgdxql1rp + job_id: j56y30lyp job_status: Passed torchscript_onnx_qnn: - inference_time: 581.0 - throughput: 1721.170395869191 + inference_time: 578.0 + throughput: 1730.1038062283737 estimated_peak_memory_range: - min: 221184 - max: 1460728 + min: 184320 + max: 1579680 primary_compute_unit: NPU precision: int8 layer_info: @@ -425,7 +327,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: j5mn6yxdp + job_id: j5wedrn45 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -434,13 +336,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T02:06:36Z' + timestamp: '2024-11-09T23:44:33Z' - torchscript_onnx_tflite: - inference_time: 348.0 - throughput: 2873.5632183908046 + inference_time: 339.0 + throughput: 2949.8525073746314 estimated_peak_memory_range: min: 16384 - max: 129041080 + max: 1467632 primary_compute_unit: NPU precision: int8 layer_info: @@ -448,14 +350,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 137 - job_id: jp4ld0r85 + job_id: jp3j4r2ng job_status: Passed torchscript_onnx_qnn: - inference_time: 577.0 - throughput: 1733.102253032929 + inference_time: 579.0 + throughput: 1727.1157167530225 estimated_peak_memory_range: - min: 225280 - max: 1796256 + min: 188416 + max: 1543464 primary_compute_unit: NPU precision: int8 layer_info: @@ -463,7 +365,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jprv2j30g + job_id: jg9l3qemg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -472,13 +374,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T02:06:39Z' + timestamp: '2024-11-09T23:44:34Z' - torchscript_onnx_tflite: - inference_time: 772.0 - throughput: 1295.3367875647668 + inference_time: 801.0 + throughput: 1248.4394506866417 estimated_peak_memory_range: min: 12288 - max: 30502416 + max: 30518544 primary_compute_unit: NPU precision: int8 layer_info: @@ -486,14 +388,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 137 - job_id: j57yl3rv5 + job_id: jgo219qkp job_status: Passed torchscript_onnx_qnn: - inference_time: 1143.0 - throughput: 874.8906386701663 + inference_time: 1309.0 + throughput: 763.9419404125287 estimated_peak_memory_range: min: 0 - max: 5733568 + max: 5853792 primary_compute_unit: NPU precision: int8 layer_info: @@ -501,7 +403,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jgn6m8vk5 + job_id: jp14dmxnp job_status: Passed reference_device_info: name: SA8295P ADP @@ -510,13 +412,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T02:06:38Z' + timestamp: '2024-11-09T23:44:36Z' - torchscript_onnx_tflite: - inference_time: 437.0 - throughput: 2288.329519450801 + inference_time: 445.0 + throughput: 2247.191011235955 estimated_peak_memory_range: min: 12288 - max: 55148576 + max: 54154176 primary_compute_unit: NPU precision: int8 layer_info: @@ -524,14 +426,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 137 - job_id: jp3j1vjlg + job_id: jpv61nxr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 763.0 - throughput: 1310.615989515072 + inference_time: 756.0 + throughput: 1322.7513227513227 estimated_peak_memory_range: min: 163840 - max: 21470592 + max: 19752960 primary_compute_unit: NPU precision: int8 layer_info: @@ -539,7 +441,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jpxk6n915 + job_id: jgdxrml6p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -548,13 +450,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:48:58Z' + timestamp: '2024-11-09T23:44:38Z' - torchscript_onnx_qnn: - inference_time: 720.0 - throughput: 1388.888888888889 + inference_time: 708.0 + throughput: 1412.4293785310736 estimated_peak_memory_range: - min: 692224 - max: 692224 + min: 544768 + max: 544768 primary_compute_unit: NPU precision: int8 layer_info: @@ -562,22 +464,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jp14wy22p + job_id: jp14dmx7p job_status: Passed torchscript_onnx: - inference_time: 743.0 - throughput: 1345.8950201884254 + inference_time: 190244.0 + throughput: 5.256407560816635 estimated_peak_memory_range: - min: 10575872 - max: 10575872 + min: 56741888 + max: 56741888 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 134 + layers_on_npu: 339 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 134 - job_id: jp0z23465 + layers_on_cpu: 62 + total_layers: 401 + job_id: j5mnwly7p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -586,4 +488,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:49:09Z' + timestamp: '2024-11-09T23:44:45Z' diff --git a/qai_hub_models/models/mobilenet_v3_small/README.md b/qai_hub_models/models/mobilenet_v3_small/README.md index fc4a6a5c..0f389417 100644 --- a/qai_hub_models/models/mobilenet_v3_small/README.md +++ b/qai_hub_models/models/mobilenet_v3_small/README.md @@ -5,8 +5,7 @@ MobileNetV3Small is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of MobileNet-v3-Small found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/mobilenet_v3_small). diff --git a/qai_hub_models/models/mobilenet_v3_small/perf.yaml b/qai_hub_models/models/mobilenet_v3_small/perf.yaml index 6de7c3e8..438ede5b 100644 --- a/qai_hub_models/models/mobilenet_v3_small/perf.yaml +++ b/qai_hub_models/models/mobilenet_v3_small/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: MobileNet-v3-Small performance_metrics: - torchscript_onnx_tflite: - inference_time: 814.0 - throughput: 1228.5012285012285 + inference_time: 811.0 + throughput: 1233.0456226880394 estimated_peak_memory_range: - min: 12288 - max: 1392592 + min: 20480 + max: 1946448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 115 - job_id: jgz3rx4o5 + job_id: j5mnw0owp job_status: Passed torchscript_onnx_qnn: - inference_time: 868.0 - throughput: 1152.073732718894 + inference_time: 862.0 + throughput: 1160.092807424594 estimated_peak_memory_range: - min: 28672 - max: 187851864 + min: 20480 + max: 12781048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j5weqd135 + job_id: jglv0yo85 job_status: Passed torchscript_onnx: - inference_time: 816.0 - throughput: 1225.4901960784314 + inference_time: 794.0 + throughput: 1259.4458438287154 estimated_peak_memory_range: - min: 12288 - max: 7196272 + min: 45056 + max: 7174960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jpxky7dl5 + job_id: jp14dnv2p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,36 +94,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:41:34Z' - - torchscript_onnx_qnn: - inference_time: 834.0 - throughput: 1199.0407673860911 + timestamp: '2024-11-09T22:26:37Z' + - torchscript_onnx_tflite: + inference_time: 536.0 + throughput: 1865.6716417910447 estimated_peak_memory_range: - min: 634880 - max: 1941608 + min: 16384 + max: 46968432 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 126 + layers_on_npu: 115 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 126 - job_id: jg9lw3xwg + total_layers: 115 + job_id: jgn69zor5 job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:41:25Z' - - torchscript_onnx_qnn: - inference_time: 555.0 - throughput: 1801.8018018018017 + torchscript_onnx_qnn: + inference_time: 574.0 + throughput: 1742.1602787456445 estimated_peak_memory_range: min: 0 - max: 8598336 + max: 15971984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -129,7 +123,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jp14edv8p + job_id: j56y38r0p + job_status: Passed + torchscript_onnx: + inference_time: 562.0 + throughput: 1779.3594306049822 + estimated_peak_memory_range: + min: 0 + max: 49685968 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 128 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 128 + job_id: jgdxrdzep job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -138,13 +147,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-31T14:41:26Z' - - torchscript_onnx_qnn: - inference_time: 496.0 - throughput: 2016.1290322580646 + timestamp: '2024-11-09T22:26:38Z' + - torchscript_onnx_tflite: + inference_time: 445.0 + throughput: 2247.191011235955 + estimated_peak_memory_range: + min: 12288 + max: 22275632 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 115 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 115 + job_id: jprv4lo9g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 473.0 + throughput: 2114.164904862579 estimated_peak_memory_range: min: 0 - max: 11212816 + max: 11192400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -152,7 +176,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgdxorzrp + job_id: jp3j4zxlg + job_status: Passed + torchscript_onnx: + inference_time: 599.0 + throughput: 1669.449081803005 + estimated_peak_memory_range: + min: 0 + max: 22918064 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 128 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 128 + job_id: j57yje7l5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -161,13 +200,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-31T14:41:27Z' - - torchscript_onnx_qnn: + timestamp: '2024-11-09T22:26:39Z' + - torchscript_onnx_tflite: + inference_time: 810.0 + throughput: 1234.567901234568 + estimated_peak_memory_range: + min: 12288 + max: 1325984 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 115 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 115 + job_id: jp2k7r44p + job_status: Passed + torchscript_onnx_qnn: inference_time: 828.0 throughput: 1207.729468599034 estimated_peak_memory_range: min: 634880 - max: 1777704 + max: 1780672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -175,7 +229,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j5weqd1m5 + job_id: jgo21loxp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +238,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-31T14:41:28Z' + timestamp: '2024-11-09T22:26:30Z' - torchscript_onnx_tflite: - inference_time: 813.0 - throughput: 1230.0123001230013 + inference_time: 812.0 + throughput: 1231.527093596059 estimated_peak_memory_range: - min: 12288 - max: 2044840 + min: 28672 + max: 150111328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -198,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 115 - job_id: jgdxqwwep + job_id: jpy14oq7p job_status: Passed torchscript_onnx_qnn: - inference_time: 837.0 - throughput: 1194.7431302270013 + inference_time: 839.0 + throughput: 1191.8951132300358 estimated_peak_memory_range: - min: 634880 - max: 1929920 + min: 647168 + max: 2386856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -213,7 +267,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jpxk6vv15 + job_id: jgjv0roxg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -222,13 +276,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:24:57Z' + timestamp: '2024-11-09T22:26:32Z' - torchscript_onnx_tflite: - inference_time: 808.0 - throughput: 1237.6237623762377 + inference_time: 814.0 + throughput: 1228.5012285012285 estimated_peak_memory_range: - min: 20480 - max: 1345480 + min: 28672 + max: 1396720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -236,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 115 - job_id: j57ylzzl5 + job_id: jp0z1md65 job_status: Passed torchscript_onnx_qnn: - inference_time: 846.0 - throughput: 1182.033096926714 + inference_time: 844.0 + throughput: 1184.8341232227488 estimated_peak_memory_range: - min: 634880 - max: 2012504 + min: 626688 + max: 2259240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -251,7 +305,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j5mn6rrwp + job_id: jpedr7815 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -260,13 +314,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:24:58Z' + timestamp: '2024-11-09T22:26:33Z' - torchscript_onnx_tflite: - inference_time: 812.0 - throughput: 1231.527093596059 + inference_time: 813.0 + throughput: 1230.0123001230013 estimated_peak_memory_range: - min: 28672 - max: 1574504 + min: 0 + max: 1552904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -274,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 115 - job_id: jp4ldqqv5 + job_id: jp8q3e6xp job_status: Passed torchscript_onnx_qnn: - inference_time: 844.0 - throughput: 1184.8341232227488 + inference_time: 842.0 + throughput: 1187.648456057007 estimated_peak_memory_range: - min: 626688 - max: 2263016 + min: 634880 + max: 1911016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -289,7 +343,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgn6m22r5 + job_id: jgz3xl8k5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -298,13 +352,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:24:59Z' - - torchscript_onnx_qnn: - inference_time: 1158.0 - throughput: 863.5578583765113 + timestamp: '2024-11-09T22:26:34Z' + - torchscript_onnx_tflite: + inference_time: 1545.0 + throughput: 647.2491909385113 + estimated_peak_memory_range: + min: 12288 + max: 22483488 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 115 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 115 + job_id: jgkel2o2g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1689.0 + throughput: 592.0663114268798 estimated_peak_memory_range: - min: 3317760 - max: 20529056 + min: 618496 + max: 6514192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -312,7 +381,45 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jp4lvx915 + job_id: j5wedl165 + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:26:35Z' + - torchscript_onnx_tflite: + inference_time: 1110.0 + throughput: 900.9009009009009 + estimated_peak_memory_range: + min: 12288 + max: 48862320 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 115 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 115 + job_id: j5q67lz4p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1182.0 + throughput: 846.0236886632825 + estimated_peak_memory_range: + min: 618496 + max: 17821280 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 128 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 128 + job_id: jg9l3zxlg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -321,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-31T14:41:33Z' + timestamp: '2024-11-09T22:26:36Z' - torchscript_onnx_qnn: - inference_time: 987.0 - throughput: 1013.1712259371834 + inference_time: 979.0 + throughput: 1021.4504596527069 estimated_peak_memory_range: min: 602112 max: 602112 @@ -335,7 +442,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jg9lw3x8g + job_id: jpv61lej5 + job_status: Passed + torchscript_onnx: + inference_time: 843.0 + throughput: 1186.2396204033214 + estimated_peak_memory_range: + min: 7524352 + max: 7524352 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 128 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 128 + job_id: jp4lxy9v5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -344,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-31T14:41:29Z' + timestamp: '2024-11-09T22:26:40Z' diff --git a/qai_hub_models/models/openai_clip/README.md b/qai_hub_models/models/openai_clip/README.md index 07421d68..cc179b2f 100644 --- a/qai_hub_models/models/openai_clip/README.md +++ b/qai_hub_models/models/openai_clip/README.md @@ -5,8 +5,7 @@ Contrastive Language-Image Pre-Training (CLIP) uses a ViT like transformer to get visual features and a causal language model to get the text features. Both the text and visual features can then be used for a variety of zero-shot learning tasks. -This is based on the implementation of OpenAI-Clip found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/openai_clip). diff --git a/qai_hub_models/models/openai_clip/perf.yaml b/qai_hub_models/models/openai_clip/perf.yaml index f391999d..0324beae 100644 --- a/qai_hub_models/models/openai_clip/perf.yaml +++ b/qai_hub_models/models/openai_clip/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: CLIPTextEncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 5669.0 - throughput: 176.3979537837361 + inference_time: 5704.0 + throughput: 175.3155680224404 estimated_peak_memory_range: - min: 16384 - max: 2517176 + min: 28672 + max: 1946216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 660 - job_id: jglv307j5 + job_id: jgdxrdrlp job_status: Passed torchscript_onnx_qnn: - inference_time: 4848.0 - throughput: 206.27062706270627 + inference_time: 4730.0 + throughput: 211.41649048625794 estimated_peak_memory_range: - min: 12288 - max: 18497432 + min: 16384 + max: 24022440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 445 - job_id: jp3je483g + job_id: j56y38q0p job_status: Passed torchscript_onnx: - inference_time: 35178.0 - throughput: 28.426857695150378 + inference_time: 35210.0 + throughput: 28.401022436807725 estimated_peak_memory_range: - min: 49152 - max: 136942632 + min: 61440 + max: 137015504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 1 total_layers: 508 - job_id: jgke3lowg + job_id: jp8q3e8xp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:41:03Z' + timestamp: '2024-11-09T22:25:43Z' - torchscript_onnx_tflite: - inference_time: 5814.0 - throughput: 171.9986240110079 + inference_time: 4080.0 + throughput: 245.09803921568627 estimated_peak_memory_range: min: 16384 - max: 2974720 + max: 204228272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 660 - job_id: jgn6l8wm5 + job_id: jg9l3z9lg job_status: Passed torchscript_onnx_qnn: - inference_time: 4834.0 - throughput: 206.8680182043856 + inference_time: 3379.0 + throughput: 295.9455460195324 estimated_peak_memory_range: - min: 28672 - max: 1275472 + min: 12288 + max: 75205648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,22 +123,37 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 445 - job_id: jpv6v1ek5 + job_id: jgo21lexp + job_status: Passed + torchscript_onnx: + inference_time: 25241.0 + throughput: 39.61808169248445 + estimated_peak_memory_range: + min: 53248 + max: 562262672 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 507 + layers_on_gpu: 0 + layers_on_cpu: 1 + total_layers: 508 + job_id: j5q67lw4p job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:40:47Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T22:25:45Z' - torchscript_onnx_tflite: - inference_time: 4939.0 - throughput: 202.47013565499088 + inference_time: 3991.0 + throughput: 250.56376847907794 estimated_peak_memory_range: - min: 16384 - max: 204794304 + min: 12288 + max: 114532160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -144,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 660 - job_id: jp2k0nzmp + job_id: jgdxrd7ep job_status: Passed torchscript_onnx_qnn: - inference_time: 3355.0 - throughput: 298.06259314456037 + inference_time: 2754.0 + throughput: 363.10820624546113 estimated_peak_memory_range: - min: 12288 - max: 8554432 + min: 8192 + max: 68454944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,30 +176,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 445 - job_id: jpedkr8o5 + job_id: jgjv0rkxg job_status: Passed - reference_device_info: - name: Samsung Galaxy S24 - os: '14' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-31T14:40:48Z' - - torchscript_onnx_qnn: - inference_time: 3256.0 - throughput: 307.12530712530713 + torchscript_onnx: + inference_time: 23811.0 + throughput: 41.99739616143799 estimated_peak_memory_range: - min: 8192 - max: 68321056 + min: 24576 + max: 329865168 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 445 + layers_on_npu: 507 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 445 - job_id: j5weqd835 + layers_on_cpu: 1 + total_layers: 508 + job_id: j56y38v0p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -191,13 +200,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-31T14:40:50Z' + timestamp: '2024-11-09T22:25:48Z' - torchscript_onnx_tflite: - inference_time: 5738.0 - throughput: 174.2767514813524 + inference_time: 5664.0 + throughput: 176.5536723163842 estimated_peak_memory_range: - min: 28672 - max: 2136240 + min: 20480 + max: 1052640600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -205,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 660 - job_id: jp0z37xe5 + job_id: jp4lxyjv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 4834.0 - throughput: 206.8680182043856 + inference_time: 4838.0 + throughput: 206.69698222405952 estimated_peak_memory_range: - min: 32768 - max: 1097056 + min: 36864 + max: 1233320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -220,7 +229,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 445 - job_id: jp14ed78p + job_id: jgz3xlvk5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -229,13 +238,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-31T14:40:52Z' + timestamp: '2024-11-09T22:25:29Z' - torchscript_onnx_tflite: - inference_time: 5714.0 - throughput: 175.00875043752188 + inference_time: 5761.0 + throughput: 173.58097552508247 estimated_peak_memory_range: min: 16384 - max: 2335720 + max: 2296704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -243,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 660 - job_id: j5mn6rjwp + job_id: j5mnw0vwp job_status: Passed torchscript_onnx_qnn: - inference_time: 4884.0 - throughput: 204.75020475020474 + inference_time: 4832.0 + throughput: 206.95364238410596 estimated_peak_memory_range: - min: 32768 - max: 1263864 + min: 28672 + max: 1501000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -258,7 +267,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 445 - job_id: jp8qmolxp + job_id: jgdxrd0ep job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -267,13 +276,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:24:32Z' + timestamp: '2024-11-09T22:25:33Z' - torchscript_onnx_tflite: - inference_time: 5672.0 - throughput: 176.30465444287728 + inference_time: 5675.0 + throughput: 176.21145374449338 estimated_peak_memory_range: min: 24576 - max: 2076968 + max: 2096776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -281,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 660 - job_id: jprv2kz9g + job_id: jprv4l19g job_status: Passed torchscript_onnx_qnn: - inference_time: 4833.0 - throughput: 206.9108214359611 + inference_time: 4864.0 + throughput: 205.5921052631579 estimated_peak_memory_range: min: 32768 - max: 1252976 + max: 1301672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -296,7 +305,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 445 - job_id: j5q6r8j4p + job_id: jp4lxy8v5 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -305,13 +314,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:24:34Z' + timestamp: '2024-11-09T22:25:35Z' - torchscript_onnx_tflite: - inference_time: 5829.0 - throughput: 171.556013038257 + inference_time: 5702.0 + throughput: 175.377060680463 estimated_peak_memory_range: - min: 28672 - max: 2428024 + min: 20480 + max: 2199552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -319,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 660 - job_id: jpy1je97p + job_id: jpy14ov7p job_status: Passed torchscript_onnx_qnn: - inference_time: 4777.0 - throughput: 209.33640360058615 + inference_time: 4893.0 + throughput: 204.37359493153485 estimated_peak_memory_range: - min: 28672 - max: 1340296 + min: 20480 + max: 1759456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -334,7 +343,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 445 - job_id: j56yz6k0p + job_id: j5mnw04wp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -343,13 +352,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:24:35Z' + timestamp: '2024-11-09T22:25:37Z' + - torchscript_onnx_tflite: + inference_time: 7762.0 + throughput: 128.83277505797474 + estimated_peak_memory_range: + min: 16384 + max: 91729392 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 658 + layers_on_gpu: 0 + layers_on_cpu: 2 + total_layers: 660 + job_id: jp8q3ewxp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 6779.0 + throughput: 147.5143826523086 + estimated_peak_memory_range: + min: 57344 + max: 5834992 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 445 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 445 + job_id: jprv4l99g + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:25:39Z' - torchscript_onnx_tflite: inference_time: 6582.0 throughput: 151.92950470981464 estimated_peak_memory_range: - min: 16384 - max: 175498752 + min: 65536 + max: 175124448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -357,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 660 - job_id: jpv60xnm5 + job_id: j5q67l94p job_status: Passed torchscript_onnx_qnn: - inference_time: 5311.0 - throughput: 188.28845791752966 + inference_time: 5291.0 + throughput: 189.000189000189 estimated_peak_memory_range: min: 12288 - max: 70434944 + max: 71082960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -372,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 445 - job_id: jp0zl1d95 + job_id: jpy14on7p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -381,13 +428,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-31T14:41:01Z' + timestamp: '2024-11-09T22:25:41Z' - torchscript_onnx_qnn: - inference_time: 5195.0 - throughput: 192.49278152069297 + inference_time: 5229.0 + throughput: 191.2411550965768 estimated_peak_memory_range: - min: 135168 - max: 135168 + min: 151552 + max: 151552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -395,7 +442,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 445 - job_id: j57yxjkv5 + job_id: jg9l3z8lg + job_status: Passed + torchscript_onnx: + inference_time: 38327.0 + throughput: 26.09126725285047 + estimated_peak_memory_range: + min: 132689920 + max: 132689920 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 507 + layers_on_gpu: 0 + layers_on_cpu: 1 + total_layers: 508 + job_id: jgo21lmxp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -404,15 +466,15 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-31T14:40:54Z' + timestamp: '2024-11-09T22:25:50Z' - name: CLIPImageEncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 36964.0 - throughput: 27.053349204631534 + inference_time: 36525.0 + throughput: 27.378507871321013 estimated_peak_memory_range: - min: 77824 - max: 2775448 + min: 32768 + max: 2793544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -420,14 +482,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 659 - job_id: j56yn3v6p + job_id: j5wedlm65 job_status: Passed torchscript_onnx_qnn: - inference_time: 27216.0 - throughput: 36.74309229864785 + inference_time: 27189.0 + throughput: 36.77957997719666 estimated_peak_memory_range: - min: 0 - max: 59898984 + min: 40960 + max: 53294840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -435,22 +497,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 438 - job_id: jgo231mqp - job_status: Passed - torchscript_onnx: - inference_time: 174397.0 - throughput: 5.734043590199373 - estimated_peak_memory_range: - min: 147456 - max: 204070600 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 501 - layers_on_gpu: 0 - layers_on_cpu: 1 - total_layers: 502 - job_id: j5q637znp + job_id: jp3j4zqlg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -459,13 +506,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:41:03Z' + timestamp: '2024-11-09T22:25:43Z' - torchscript_onnx_tflite: - inference_time: 37533.0 - throughput: 26.643220632510058 + inference_time: 29370.0 + throughput: 34.04834865509023 estimated_peak_memory_range: - min: 0 - max: 2308472 + min: 49152 + max: 698269056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -473,14 +520,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 659 - job_id: jprv8j7eg + job_id: jp14dnq2p job_status: Passed torchscript_onnx_qnn: - inference_time: 22225.0 - throughput: 44.99437570303712 + inference_time: 20849.0 + throughput: 47.96393112379491 estimated_peak_memory_range: - min: 692224 - max: 2013824 + min: 0 + max: 177907744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -488,22 +535,37 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 438 - job_id: jgjve0ovg + job_id: jpv61lzj5 + job_status: Passed + torchscript_onnx: + inference_time: 116721.0 + throughput: 8.567438592883885 + estimated_peak_memory_range: + min: 786432 + max: 3744817744 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 501 + layers_on_gpu: 0 + layers_on_cpu: 1 + total_layers: 502 + job_id: jglv0y785 job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:40:47Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T22:25:46Z' - torchscript_onnx_tflite: - inference_time: 28795.0 - throughput: 34.72825143254037 + inference_time: 25793.0 + throughput: 38.77020897142636 estimated_peak_memory_range: - min: 49152 - max: 699337056 + min: 12288 + max: 483488016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -511,14 +573,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 659 - job_id: jpy1r0y4p + job_id: j57yjevl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 16789.0 - throughput: 59.562808982071594 + inference_time: 19399.0 + throughput: 51.549048920047426 estimated_peak_memory_range: - min: 667648 - max: 9229776 + min: 0 + max: 180136544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -526,30 +588,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 438 - job_id: jgz3rx8o5 + job_id: jpedr7415 job_status: Passed - reference_device_info: - name: Samsung Galaxy S24 - os: '14' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-31T14:40:49Z' - - torchscript_onnx_qnn: - inference_time: 19207.0 - throughput: 52.06435153850159 + torchscript_onnx: + inference_time: 109572.0 + throughput: 9.126419158179097 estimated_peak_memory_range: - min: 643072 - max: 180806864 + min: 626688 + max: 2847535344 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 438 + layers_on_npu: 501 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 438 - job_id: jg9lw3kwg + layers_on_cpu: 1 + total_layers: 502 + job_id: jp3j4z8lg job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -558,13 +612,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-31T14:40:51Z' + timestamp: '2024-11-09T22:25:48Z' - torchscript_onnx_tflite: - inference_time: 37131.0 - throughput: 26.931674342193855 + inference_time: 36934.0 + throughput: 27.075323550116423 estimated_peak_memory_range: - min: 2998272 - max: 4942704 + min: 65536 + max: 2216840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -572,14 +626,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 659 - job_id: jp8q0vk8p + job_id: jpxk7le15 job_status: Passed torchscript_onnx_qnn: - inference_time: 21966.0 - throughput: 45.524902121460435 + inference_time: 21964.0 + throughput: 45.52904753232562 estimated_peak_memory_range: - min: 733184 - max: 2002848 + min: 749568 + max: 1836256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -587,7 +641,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 438 - job_id: jgdxor8rp + job_id: j5wedlx65 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -596,13 +650,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-31T14:40:53Z' + timestamp: '2024-11-09T22:25:29Z' - torchscript_onnx_tflite: - inference_time: 37524.0 - throughput: 26.649610915680633 + inference_time: 36858.0 + throughput: 27.13115198871344 estimated_peak_memory_range: - min: 73728 - max: 2544424 + min: 32768 + max: 2097336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -610,14 +664,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 659 - job_id: jgn6m2jr5 + job_id: jgn69zrr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 22387.0 - throughput: 44.66878098896681 + inference_time: 22523.0 + throughput: 44.39905873995471 estimated_peak_memory_range: - min: 667648 - max: 1960256 + min: 679936 + max: 2334352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -625,7 +679,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 438 - job_id: jgkeqzj2g + job_id: j57yje6l5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -634,13 +688,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:24:32Z' + timestamp: '2024-11-09T22:25:33Z' - torchscript_onnx_tflite: - inference_time: 36928.0 - throughput: 27.079722703639515 + inference_time: 37002.0 + throughput: 27.025566185611588 estimated_peak_memory_range: - min: 106496 - max: 2807408 + min: 77824 + max: 3481816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -648,14 +702,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 659 - job_id: jp2k9824p + job_id: jp2k7r34p job_status: Passed torchscript_onnx_qnn: - inference_time: 22527.0 - throughput: 44.39117503440316 + inference_time: 22519.0 + throughput: 44.40694524623651 estimated_peak_memory_range: - min: 712704 - max: 1974640 + min: 692224 + max: 1909264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -663,7 +717,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 438 - job_id: jglv2nj85 + job_id: jpxk7lm15 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -672,13 +726,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:24:34Z' + timestamp: '2024-11-09T22:25:35Z' - torchscript_onnx_tflite: - inference_time: 36939.0 - throughput: 27.071658680527356 + inference_time: 37107.0 + throughput: 26.949093163015064 estimated_peak_memory_range: - min: 81920 - max: 2532944 + min: 606208 + max: 3166152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -686,14 +740,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 659 - job_id: jp0z2yn65 + job_id: jp0z1me65 job_status: Passed torchscript_onnx_qnn: - inference_time: 22300.0 - throughput: 44.84304932735426 + inference_time: 22501.0 + throughput: 44.44246922359006 estimated_peak_memory_range: - min: 712704 - max: 2002784 + min: 671744 + max: 2032432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -701,7 +755,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 438 - job_id: jp3j1kylg + job_id: jgn69zxr5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -710,13 +764,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:24:36Z' + timestamp: '2024-11-09T22:25:38Z' - torchscript_onnx_tflite: - inference_time: 36811.0 - throughput: 27.16579283366385 + inference_time: 42408.0 + throughput: 23.580456517638183 estimated_peak_memory_range: - min: 192512 - max: 575028864 + min: 69632 + max: 376753424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -724,14 +778,52 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 659 - job_id: jgjvz488g + job_id: jp14dro2p job_status: Passed torchscript_onnx_qnn: - inference_time: 31063.0 - throughput: 32.19264076232173 + inference_time: 26489.0 + throughput: 37.751519498659825 + estimated_peak_memory_range: + min: 643072 + max: 6233248 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 438 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 438 + job_id: jp2k7rj4p + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-11T13:56:55Z' + - torchscript_onnx_tflite: + inference_time: 36678.0 + throughput: 27.26430012541578 + estimated_peak_memory_range: + min: 315392 + max: 573649232 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 659 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 659 + job_id: jglv0ye85 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 31003.0 + throughput: 32.25494307002548 estimated_peak_memory_range: min: 0 - max: 177051984 + max: 173061584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -739,7 +831,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 438 - job_id: jp8qz36kp + job_id: jp0z1mk65 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -748,21 +840,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-31T14:41:02Z' - - torchscript_onnx_qnn: - inference_time: 22149.0 - throughput: 45.14876518127229 + timestamp: '2024-11-09T22:25:42Z' + - torchscript_onnx: + inference_time: 161714.0 + throughput: 6.183756508403725 estimated_peak_memory_range: - min: 602112 - max: 602112 + min: 196730880 + max: 196730880 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 438 + layers_on_npu: 501 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 438 - job_id: jp4lvxm85 + layers_on_cpu: 1 + total_layers: 502 + job_id: jpv61l4j5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -771,4 +863,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-31T14:40:54Z' + timestamp: '2024-11-09T22:25:50Z' diff --git a/qai_hub_models/models/openpose/README.md b/qai_hub_models/models/openpose/README.md index 601af4de..62ebe8ec 100644 --- a/qai_hub_models/models/openpose/README.md +++ b/qai_hub_models/models/openpose/README.md @@ -5,8 +5,7 @@ OpenPose is a machine learning model that estimates body and hand pose in an image and returns location and confidence for each of 19 joints. -This is based on the implementation of OpenPose found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/openpose). diff --git a/qai_hub_models/models/openpose/perf.yaml b/qai_hub_models/models/openpose/perf.yaml index 88c569f4..e22c83f6 100644 --- a/qai_hub_models/models/openpose/perf.yaml +++ b/qai_hub_models/models/openpose/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: OpenPose performance_metrics: - torchscript_onnx_tflite: - inference_time: 11719.0 - throughput: 85.3315129277242 + inference_time: 11803.0 + throughput: 84.72422265525714 estimated_peak_memory_range: min: 212992 - max: 2438976 + max: 2246920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jpxkn20j5 + job_id: jpedr7v05 job_status: Passed torchscript_onnx_qnn: - inference_time: 11949.0 - throughput: 83.68901163277262 + inference_time: 11850.0 + throughput: 84.38818565400844 estimated_peak_memory_range: - min: 622592 - max: 225615216 + min: 647168 + max: 235954016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: j5q6eo17p + job_id: j5mnw0wqp job_status: Passed torchscript_onnx: - inference_time: 11989.0 - throughput: 83.40979230961715 + inference_time: 12034.0 + throughput: 83.09788931361143 estimated_peak_memory_range: - min: 32768 - max: 1193082328 + min: 16384 + max: 118870080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jg9lje2qg + job_id: jp3j4z4zg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:39:13Z' + timestamp: '2024-11-09T22:24:22Z' - torchscript_onnx_tflite: - inference_time: 12005.0 - throughput: 83.29862557267805 + inference_time: 8698.0 + throughput: 114.96895838123707 estimated_peak_memory_range: - min: 229376 - max: 2334840 + min: 225280 + max: 42021712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: j5mnqy9yp + job_id: jgz3xl765 job_status: Passed torchscript_onnx_qnn: - inference_time: 11881.0 - throughput: 84.167999326656 + inference_time: 8756.0 + throughput: 114.20740063956144 estimated_peak_memory_range: min: 622592 - max: 235837704 + max: 18680240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jglv6r8e5 + job_id: jprv4l4eg job_status: Passed torchscript_onnx: - inference_time: 12066.0 - throughput: 82.8775070445881 + inference_time: 8802.0 + throughput: 113.61054305839582 estimated_peak_memory_range: - min: 16384 - max: 119228008 + min: 1159168 + max: 48619888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jp14yx1kp + job_id: jgo21l1dp job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:39:14Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T22:24:23Z' - torchscript_onnx_tflite: - inference_time: 11402.0 - throughput: 87.70391159445711 + inference_time: 8654.0 + throughput: 115.5535012710885 estimated_peak_memory_range: - min: 192512 - max: 41548096 + min: 188416 + max: 23873360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jgn6l8qv5 + job_id: j5wedl9j5 job_status: Passed torchscript_onnx_qnn: - inference_time: 11473.0 - throughput: 87.16116098666434 + inference_time: 8714.0 + throughput: 114.75786091347257 estimated_peak_memory_range: - min: 643072 - max: 18812016 + min: 614400 + max: 15934528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: j56yelmvp + job_id: jgdxrj6ep job_status: Passed torchscript_onnx: - inference_time: 11505.0 - throughput: 86.91873098652759 + inference_time: 8756.0 + throughput: 114.20740063956144 estimated_peak_memory_range: min: 0 - max: 46876688 + max: 27241216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jgdxel4kp + job_id: jpv61l1m5 job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:39:15Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-11T13:57:06Z' - torchscript_onnx_tflite: - inference_time: 8656.0 - throughput: 115.5268022181146 + inference_time: 11651.0 + throughput: 85.82954252853833 estimated_peak_memory_range: - min: 188416 - max: 25046336 + min: 200704 + max: 3358088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jgke7m0yg + job_id: jg9l3z4vg job_status: Passed torchscript_onnx_qnn: - inference_time: 8706.0 - throughput: 114.86331265793706 + inference_time: 12100.0 + throughput: 82.64462809917356 estimated_peak_memory_range: - min: 614400 - max: 16141840 + min: 643072 + max: 1812224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: j5we2nzz5 - job_status: Passed - torchscript_onnx: - inference_time: 8756.0 - throughput: 114.20740063956144 - estimated_peak_memory_range: - min: 0 - max: 27881296 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 189 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 189 - job_id: jg9lje2vg + job_id: jpy14o44p job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:39:16Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T22:24:14Z' - torchscript_onnx_tflite: - inference_time: 11640.0 - throughput: 85.91065292096219 + inference_time: 11775.0 + throughput: 84.92569002123142 estimated_peak_memory_range: - min: 204800 - max: 2251440 + min: 208896 + max: 2330640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jprv8jdvg + job_id: jp14dn8lp job_status: Passed torchscript_onnx_qnn: - inference_time: 12082.0 - throughput: 82.76775368316504 + inference_time: 12167.0 + throughput: 82.18952905399853 estimated_peak_memory_range: - min: 634880 - max: 1745376 + min: 679936 + max: 2327128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jp3jv27xg + job_id: jp8q3e38p job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:39:04Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T22:24:17Z' - torchscript_onnx_tflite: - inference_time: 11743.0 - throughput: 85.15711487694797 + inference_time: 11800.0 + throughput: 84.7457627118644 estimated_peak_memory_range: - min: 196608 - max: 1980944 + min: 208896 + max: 2089896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jpedw9q15 + job_id: jgdxrdvlp job_status: Passed torchscript_onnx_qnn: - inference_time: 12191.0 - throughput: 82.02772537117546 + inference_time: 12096.0 + throughput: 82.67195767195767 estimated_peak_memory_range: - min: 630784 - max: 1837464 + min: 663552 + max: 2011456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jg9lyv6lg + job_id: jgkel2log job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:23:44Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T22:24:18Z' - torchscript_onnx_tflite: - inference_time: 11682.0 - throughput: 85.60178051703475 + inference_time: 11747.0 + throughput: 85.12811781731506 estimated_peak_memory_range: - min: 0 - max: 2283912 + min: 204800 + max: 2347504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jgz3je6k5 + job_id: j57yjejr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 12120.0 - throughput: 82.50825082508251 + inference_time: 12115.0 + throughput: 82.54230293025175 estimated_peak_memory_range: - min: 397312 - max: 1674864 + min: 638976 + max: 1902544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jp14w0r2p + job_id: j5q67l7mp job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:23:45Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T22:24:19Z' - torchscript_onnx_tflite: - inference_time: 11685.0 - throughput: 85.57980316645272 + inference_time: 26584.0 + throughput: 37.61661149563648 estimated_peak_memory_range: min: 208896 - max: 1980128 + max: 24066864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: j5we3oj65 + job_id: jp4lxyxl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 12100.0 - throughput: 82.64462809917356 + inference_time: 25854.0 + throughput: 38.67873443180939 estimated_peak_memory_range: - min: 638976 - max: 2295120 + min: 700416 + max: 6330304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jgdxqwjep + job_id: jglv0y0l5 job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:23:46Z' + chipset: SA8295P + timestamp: '2024-11-09T22:24:20Z' - torchscript_onnx_tflite: - inference_time: 23450.0 - throughput: 42.643923240938165 + inference_time: 23393.0 + throughput: 42.74783054759971 estimated_peak_memory_range: - min: 237568 - max: 43654464 + min: 278528 + max: 43579712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jp8q0vrzp + job_id: jpxk7l795 job_status: Passed torchscript_onnx_qnn: - inference_time: 23734.0 - throughput: 42.13364793123789 + inference_time: 23712.0 + throughput: 42.172739541160595 estimated_peak_memory_range: - min: 638976 - max: 19982736 + min: 618496 + max: 18925200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jgz3okyz5 + job_id: j56y3837p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:39:10Z' + timestamp: '2024-11-09T22:24:21Z' - torchscript_onnx_qnn: - inference_time: 12717.0 - throughput: 78.63489816780687 + inference_time: 12657.0 + throughput: 79.0076637433831 estimated_peak_memory_range: min: 602112 max: 602112 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jgo2kqw4p + job_id: jp0z1m1e5 job_status: Passed torchscript_onnx: - inference_time: 12526.0 - throughput: 79.83394539358135 + inference_time: 12508.0 + throughput: 79.94883274704189 estimated_peak_memory_range: - min: 107786240 - max: 107786240 + min: 106549248 + max: 106549248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: j5we2nzj5 + job_id: jgjv0r08g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:39:15Z' + timestamp: '2024-11-09T22:24:25Z' diff --git a/qai_hub_models/models/plamo_1b_quantized/README.md b/qai_hub_models/models/plamo_1b_quantized/README.md index fe3322ea..4fbae336 100644 --- a/qai_hub_models/models/plamo_1b_quantized/README.md +++ b/qai_hub_models/models/plamo_1b_quantized/README.md @@ -5,8 +5,7 @@ PLaMo-1B is the first small language model (SLM) in the PLaMo™ Lite series from Preferred Networks (PFN), designed to power AI applications for edge devices including mobile, automotive, and robots across various industrial sectors. This model builds on the advancements of PLaMo-100B, a 100-billion parameter large language model (LLM) developed from the ground up by PFN’s subsidiary Preferred Elements (PFE). Leveraging high-quality Japanese and English text data generated by PLaMo-100B, PLaMo-1B has been pre-trained on a total of 4 trillion tokens. As a result, it delivers exceptional performance in Japanese benchmarks, outperforming other SLMs with similar parameter sizes. In evaluations such as Jaster 0-shot and 4-shot, PLaMo-1B has demonstrated performance on par with larger LLMs, making it a highly efficient solution for edge-based AI tasks. -This is based on the implementation of PLaMo-1B found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/plamo_1b_quantized). diff --git a/qai_hub_models/models/posenet_mobilenet/README.md b/qai_hub_models/models/posenet_mobilenet/README.md index 7d2a086d..c55c003c 100644 --- a/qai_hub_models/models/posenet_mobilenet/README.md +++ b/qai_hub_models/models/posenet_mobilenet/README.md @@ -5,8 +5,7 @@ Posenet performs pose estimation on human images. -This is based on the implementation of Posenet-Mobilenet found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/posenet_mobilenet). diff --git a/qai_hub_models/models/posenet_mobilenet/perf.yaml b/qai_hub_models/models/posenet_mobilenet/perf.yaml index caab5ded..830c6fd5 100644 --- a/qai_hub_models/models/posenet_mobilenet/perf.yaml +++ b/qai_hub_models/models/posenet_mobilenet/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: Posenet-Mobilenet performance_metrics: - torchscript_onnx_tflite: - inference_time: 1373.0 - throughput: 728.3321194464676 + inference_time: 1368.0 + throughput: 730.9941520467836 estimated_peak_memory_range: - min: 12288 - max: 172104616 + min: 20480 + max: 1580080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jgz3rxwo5 + job_id: jp3j4zezg job_status: Passed torchscript_onnx_qnn: - inference_time: 1455.0 - throughput: 687.2852233676975 + inference_time: 1443.0 + throughput: 693.000693000693 estimated_peak_memory_range: - min: 16384 - max: 12937008 + min: 20480 + max: 165049264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: j5weqdx35 + job_id: jgdxrdolp job_status: Passed torchscript_onnx: - inference_time: 1900.0 - throughput: 526.3157894736842 + inference_time: 1892.0 + throughput: 528.5412262156448 estimated_peak_memory_range: - min: 24576 - max: 155321040 + min: 12288 + max: 7851640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jp2kl7jrp + job_id: jp8q3e78p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,51 +94,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:40:03Z' - - torchscript_onnx_qnn: - inference_time: 1402.0 - throughput: 713.2667617689016 - estimated_peak_memory_range: - min: 1617920 - max: 3055624 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 69 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 69 - job_id: jg9lw38wg - job_status: Passed - torchscript_onnx: - inference_time: 1887.0 - throughput: 529.9417064122946 + timestamp: '2024-11-09T22:23:36Z' + - torchscript_onnx_tflite: + inference_time: 962.0 + throughput: 1039.5010395010395 estimated_peak_memory_range: - min: 16384 - max: 8121768 + min: 12288 + max: 42379248 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 70 + layers_on_npu: 41 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 70 - job_id: jpede3675 + total_layers: 41 + job_id: jgo21l3dp job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:39:54Z' - - torchscript_onnx_qnn: - inference_time: 1000.0 - throughput: 1000.0 + torchscript_onnx_qnn: + inference_time: 1019.0 + throughput: 981.3542688910696 estimated_peak_memory_range: min: 0 - max: 8964896 + max: 14734800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -144,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jp14ed38p + job_id: j57yjedr5 job_status: Passed torchscript_onnx: - inference_time: 1454.0 - throughput: 687.757909215956 + inference_time: 1331.0 + throughput: 751.3148009015778 estimated_peak_memory_range: min: 0 - max: 48035600 + max: 48304912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,7 +138,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jgz3okzz5 + job_id: jgkel2yog job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -168,13 +147,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-31T14:39:55Z' - - torchscript_onnx_qnn: - inference_time: 832.0 - throughput: 1201.923076923077 + timestamp: '2024-11-09T22:23:37Z' + - torchscript_onnx_tflite: + inference_time: 966.0 + throughput: 1035.1966873706003 + estimated_peak_memory_range: + min: 8192 + max: 22641056 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 41 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 41 + job_id: jpv61lvm5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1079.0 + throughput: 926.7840593141798 estimated_peak_memory_range: min: 0 - max: 13886112 + max: 13538144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -182,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jgdxor0rp + job_id: jp4lxywl5 job_status: Passed torchscript_onnx: - inference_time: 1072.0 - throughput: 932.8358208955224 + inference_time: 1304.0 + throughput: 766.8711656441718 estimated_peak_memory_range: min: 0 - max: 25503280 + max: 25221424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -197,7 +191,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jg9ljeoqg + job_id: j5q67l2mp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -206,13 +200,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-31T14:39:56Z' - - torchscript_onnx_qnn: - inference_time: 1394.0 - throughput: 717.3601147776184 + timestamp: '2024-11-09T22:23:38Z' + - torchscript_onnx_tflite: + inference_time: 1367.0 + throughput: 731.528895391368 estimated_peak_memory_range: - min: 1617920 - max: 2939816 + min: 12288 + max: 1427296 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 41 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 41 + job_id: jgjv0re8g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1391.0 + throughput: 718.9072609633357 + estimated_peak_memory_range: + min: 1609728 + max: 3506456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -220,7 +229,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: j57yxj6v5 + job_id: jpxk7l195 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -229,13 +238,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-31T14:39:57Z' + timestamp: '2024-11-09T22:23:28Z' - torchscript_onnx_tflite: - inference_time: 1372.0 - throughput: 728.862973760933 + inference_time: 1373.0 + throughput: 728.3321194464676 estimated_peak_memory_range: min: 24576 - max: 1648376 + max: 1469344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -243,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jp8qmonxp + job_id: jpedr7k05 job_status: Passed torchscript_onnx_qnn: - inference_time: 1395.0 - throughput: 716.8458781362007 + inference_time: 1392.0 + throughput: 718.3908045977012 estimated_peak_memory_range: min: 1613824 - max: 2999032 + max: 2860384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -258,7 +267,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jglv2nd85 + job_id: jgn69zem5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -267,13 +276,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:23:20Z' + timestamp: '2024-11-09T22:23:30Z' - torchscript_onnx_tflite: - inference_time: 1370.0 - throughput: 729.92700729927 + inference_time: 1374.0 + throughput: 727.802037845706 estimated_peak_memory_range: - min: 24576 - max: 1372400 + min: 20480 + max: 9713616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -281,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jgkeqz12g + job_id: jgz3xlr65 job_status: Passed torchscript_onnx_qnn: - inference_time: 1390.0 - throughput: 719.4244604316547 + inference_time: 1389.0 + throughput: 719.9424046076314 estimated_peak_memory_range: - min: 28672 - max: 1348352 + min: 1613824 + max: 3028800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -296,7 +305,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: j56yz6x0p + job_id: jprv4lyeg job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -305,13 +314,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:23:21Z' + timestamp: '2024-11-09T22:23:31Z' - torchscript_onnx_tflite: - inference_time: 1378.0 - throughput: 725.6894049346879 + inference_time: 1371.0 + throughput: 729.3946024799417 estimated_peak_memory_range: - min: 12288 - max: 165928208 + min: 49152 + max: 14378672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -319,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: j5q6r8n4p + job_id: j5wedlqj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1392.0 - throughput: 718.3908045977012 + inference_time: 1396.0 + throughput: 716.3323782234957 estimated_peak_memory_range: - min: 16384 - max: 1238760 + min: 1638400 + max: 3005168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -334,7 +343,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jp3j1kdlg + job_id: jp2k7rmmp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -343,13 +352,66 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:23:22Z' - - torchscript_onnx_qnn: - inference_time: 2291.0 - throughput: 436.4906154517678 + timestamp: '2024-11-09T22:23:32Z' + - torchscript_onnx_tflite: + inference_time: 2801.0 + throughput: 357.0153516601214 + estimated_peak_memory_range: + min: 12288 + max: 23061968 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 41 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 41 + job_id: jg9l3zwvg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 3115.0 + throughput: 321.02728731942216 + estimated_peak_memory_range: + min: 0 + max: 5797536 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 69 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 69 + job_id: jpy14od4p + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:23:33Z' + - torchscript_onnx_tflite: + inference_time: 2189.0 + throughput: 456.82960255824577 + estimated_peak_memory_range: + min: 12288 + max: 43155536 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 41 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 41 + job_id: jp14dnelp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 2265.0 + throughput: 441.5011037527594 estimated_peak_memory_range: min: 1597440 - max: 21235648 + max: 21979696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -357,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jprve490g + job_id: jp0z1mre5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -366,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-31T14:40:02Z' + timestamp: '2024-11-09T22:23:35Z' - torchscript_onnx_qnn: - inference_time: 1555.0 - throughput: 643.0868167202573 + inference_time: 1609.0 + throughput: 621.5040397762585 estimated_peak_memory_range: min: 1589248 max: 1589248 @@ -380,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jp4lvx885 + job_id: j5mnw0zqp job_status: Passed torchscript_onnx: - inference_time: 2198.0 - throughput: 454.9590536851683 + inference_time: 2175.0 + throughput: 459.7701149425287 estimated_peak_memory_range: - min: 7057408 - max: 7057408 + min: 6955008 + max: 6955008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -395,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: j5we2nyz5 + job_id: jglv0ykl5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -404,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-31T14:39:58Z' + timestamp: '2024-11-09T22:23:39Z' diff --git a/qai_hub_models/models/posenet_mobilenet_quantized/README.md b/qai_hub_models/models/posenet_mobilenet_quantized/README.md index a7622691..fa114867 100644 --- a/qai_hub_models/models/posenet_mobilenet_quantized/README.md +++ b/qai_hub_models/models/posenet_mobilenet_quantized/README.md @@ -5,8 +5,7 @@ Posenet performs pose estimation on human images. -This is based on the implementation of Posenet-Mobilenet-Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/posenet_mobilenet_quantized). diff --git a/qai_hub_models/models/posenet_mobilenet_quantized/perf.yaml b/qai_hub_models/models/posenet_mobilenet_quantized/perf.yaml index 625b71d6..7c329b1a 100644 --- a/qai_hub_models/models/posenet_mobilenet_quantized/perf.yaml +++ b/qai_hub_models/models/posenet_mobilenet_quantized/perf.yaml @@ -18,6 +18,8 @@ aggregated: - Samsung Galaxy S21+ - Snapdragon X Elite CRD - Snapdragon X Plus 8-Core CRD + - QCS8250 (Proxy) + - RB5 (Proxy) - QCS8450 (Proxy) - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) @@ -33,6 +35,7 @@ aggregated: - Snapdragon® 888 - Snapdragon® X Elite - Snapdragon® X Plus 8-Core + - QCS8250 Proxy - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy @@ -43,11 +46,11 @@ models: - name: Posenet-Mobilenet-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 557.0 - throughput: 1795.3321364452424 + inference_time: 559.0 + throughput: 1788.9087656529516 estimated_peak_memory_range: min: 12288 - max: 2047040 + max: 2176056 primary_compute_unit: NPU precision: int8 layer_info: @@ -55,14 +58,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 48 - job_id: j5mn3wvdp + job_id: jp8q3em8p job_status: Passed torchscript_onnx_qnn: - inference_time: 635.0 - throughput: 1574.8031496062993 + inference_time: 633.0 + throughput: 1579.778830963665 estimated_peak_memory_range: - min: 20480 - max: 9613056 + min: 413696 + max: 5156176 primary_compute_unit: NPU precision: int8 layer_info: @@ -70,7 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jgn639rk5 + job_id: j5wedl3j5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -79,36 +82,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:39:27Z' - - torchscript_onnx_qnn: - inference_time: 566.0 - throughput: 1766.7844522968198 + timestamp: '2024-11-09T22:22:45Z' + - torchscript_onnx_tflite: + inference_time: 393.0 + throughput: 2544.529262086514 estimated_peak_memory_range: - min: 430080 - max: 2169664 + min: 12288 + max: 51170000 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 69 + layers_on_npu: 48 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 69 - job_id: jprve410g + total_layers: 48 + job_id: jgkel2qog job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:39:28Z' - - torchscript_onnx_qnn: - inference_time: 396.0 - throughput: 2525.252525252525 + torchscript_onnx_qnn: + inference_time: 442.0 + throughput: 2262.443438914027 estimated_peak_memory_range: min: 0 - max: 8516192 + max: 21330352 primary_compute_unit: NPU precision: int8 layer_info: @@ -116,7 +111,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jp2kl73rp + job_id: jg9l3zyvg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -125,13 +120,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-31T14:39:29Z' - - torchscript_onnx_qnn: - inference_time: 463.0 - throughput: 2159.827213822894 + timestamp: '2024-11-09T22:22:46Z' + - torchscript_onnx_tflite: + inference_time: 347.0 + throughput: 2881.844380403458 + estimated_peak_memory_range: + min: 8192 + max: 27903968 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 48 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 48 + job_id: j5q67lrmp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 392.0 + throughput: 2551.0204081632655 estimated_peak_memory_range: min: 0 - max: 17529888 + max: 17891248 primary_compute_unit: NPU precision: int8 layer_info: @@ -139,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jpy164v8p + job_id: jp14dnwlp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -148,13 +158,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-31T14:39:30Z' - - torchscript_onnx_qnn: - inference_time: 2881.0 - throughput: 347.1017007983339 + timestamp: '2024-11-09T22:22:47Z' + - torchscript_onnx_tflite: + inference_time: 2191.0 + throughput: 456.41259698767686 estimated_peak_memory_range: min: 12288 - max: 7980080 + max: 28000144 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 48 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 48 + job_id: jglv0y2l5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 2870.0 + throughput: 348.4320557491289 + estimated_peak_memory_range: + min: 425984 + max: 8665504 primary_compute_unit: NPU precision: int8 layer_info: @@ -162,7 +187,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jp0zl1e95 + job_id: jgdxrdqlp job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -171,21 +196,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-31T14:39:31Z' - - reference_device_info: + timestamp: '2024-11-09T22:22:48Z' + - torchscript_onnx_tflite: + inference_time: 13626.0 + throughput: 73.38910905621606 + estimated_peak_memory_range: + min: 438272 + max: 7910536 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 45 + layers_on_gpu: 3 + layers_on_cpu: 0 + total_layers: 48 + job_id: j56y38z7p + job_status: Passed + reference_device_info: name: RB5 (Proxy) os: '12' form_factor: Iot os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-26T23:35:28Z' - - torchscript_onnx_qnn: - inference_time: 559.0 - throughput: 1788.9087656529516 + timestamp: '2024-11-09T22:22:38Z' + - torchscript_onnx_tflite: + inference_time: 557.0 + throughput: 1795.3321364452424 estimated_peak_memory_range: - min: 425984 - max: 1762864 + min: 12288 + max: 1405968 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 48 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 48 + job_id: jp3j4z1zg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 556.0 + throughput: 1798.5611510791366 + estimated_peak_memory_range: + min: 430080 + max: 1789896 primary_compute_unit: NPU precision: int8 layer_info: @@ -193,7 +248,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jp8qz3wkp + job_id: j57yjexr5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -202,13 +257,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-31T14:39:32Z' + timestamp: '2024-11-09T22:22:50Z' - torchscript_onnx_tflite: - inference_time: 556.0 - throughput: 1798.5611510791366 + inference_time: 557.0 + throughput: 1795.3321364452424 estimated_peak_memory_range: - min: 12288 - max: 1409576 + min: 32768 + max: 1344800 primary_compute_unit: NPU precision: int8 layer_info: @@ -216,14 +271,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 48 - job_id: jgdxqwkep + job_id: jgo21lndp job_status: Passed torchscript_onnx_qnn: - inference_time: 560.0 - throughput: 1785.7142857142858 + inference_time: 563.0 + throughput: 1776.1989342806394 estimated_peak_memory_range: - min: 425984 - max: 1949392 + min: 450560 + max: 1787256 primary_compute_unit: NPU precision: int8 layer_info: @@ -231,7 +286,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: j5mn6r7wp + job_id: jpxk7ly95 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -240,13 +295,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:23:01Z' + timestamp: '2024-11-09T22:22:52Z' - torchscript_onnx_tflite: inference_time: 556.0 throughput: 1798.5611510791366 estimated_peak_memory_range: min: 12288 - max: 9052808 + max: 101185744 primary_compute_unit: NPU precision: int8 layer_info: @@ -254,14 +309,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 48 - job_id: j57ylzml5 + job_id: jpv61lrm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 562.0 - throughput: 1779.3594306049822 + inference_time: 555.0 + throughput: 1801.8018018018017 estimated_peak_memory_range: min: 430080 - max: 1669496 + max: 2116040 primary_compute_unit: NPU precision: int8 layer_info: @@ -269,7 +324,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jgn6m24r5 + job_id: j5mnw03qp job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -278,13 +333,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:23:02Z' + timestamp: '2024-11-09T22:22:53Z' - torchscript_onnx_tflite: - inference_time: 561.0 - throughput: 1782.5311942959001 + inference_time: 560.0 + throughput: 1785.7142857142858 estimated_peak_memory_range: min: 12288 - max: 1490072 + max: 1370016 primary_compute_unit: NPU precision: int8 layer_info: @@ -292,14 +347,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 48 - job_id: jpxk6vq15 + job_id: jgjv0r28g job_status: Passed torchscript_onnx_qnn: - inference_time: 561.0 - throughput: 1782.5311942959001 + inference_time: 554.0 + throughput: 1805.0541516245487 estimated_peak_memory_range: - min: 446464 - max: 2127768 + min: 438272 + max: 1736472 primary_compute_unit: NPU precision: int8 layer_info: @@ -307,7 +362,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jp2k9814p + job_id: jgn69z3m5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -316,13 +371,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:23:04Z' + timestamp: '2024-11-09T22:22:54Z' - torchscript_onnx_tflite: - inference_time: 1202.0 - throughput: 831.9467554076539 + inference_time: 1212.0 + throughput: 825.0825082508251 estimated_peak_memory_range: min: 12288 - max: 25705008 + max: 25722928 primary_compute_unit: NPU precision: int8 layer_info: @@ -330,14 +385,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 48 - job_id: jp4ldq7v5 + job_id: j57yjqol5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1533.0 - throughput: 652.3157208088714 + inference_time: 1275.0 + throughput: 784.3137254901961 estimated_peak_memory_range: min: 0 - max: 5634784 + max: 5814816 primary_compute_unit: NPU precision: int8 layer_info: @@ -345,7 +400,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jprv2kr9g + job_id: jprv4leeg job_status: Passed reference_device_info: name: SA8295P ADP @@ -354,13 +409,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T01:23:03Z' - - torchscript_onnx_qnn: - inference_time: 796.0 - throughput: 1256.2814070351758 + timestamp: '2024-11-11T13:57:17Z' + - torchscript_onnx_tflite: + inference_time: 722.0 + throughput: 1385.0415512465374 + estimated_peak_memory_range: + min: 12288 + max: 51604272 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 48 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 48 + job_id: jgz3xlj65 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 804.0 + throughput: 1243.7810945273632 estimated_peak_memory_range: min: 409600 - max: 22617488 + max: 19801856 primary_compute_unit: NPU precision: int8 layer_info: @@ -368,7 +438,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jgo231eqp + job_id: jp2k7rlmp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -377,10 +447,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-31T14:39:37Z' + timestamp: '2024-11-09T22:22:57Z' - torchscript_onnx_qnn: - inference_time: 702.0 - throughput: 1424.5014245014245 + inference_time: 714.0 + throughput: 1400.5602240896358 estimated_peak_memory_range: min: 397312 max: 397312 @@ -391,7 +461,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jgke3lrwg + job_id: jp4lxyvl5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -400,4 +470,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-31T14:39:33Z' + timestamp: '2024-11-09T22:22:51Z' diff --git a/qai_hub_models/models/quicksrnetlarge/README.md b/qai_hub_models/models/quicksrnetlarge/README.md index 2ce082f7..c68bcece 100644 --- a/qai_hub_models/models/quicksrnetlarge/README.md +++ b/qai_hub_models/models/quicksrnetlarge/README.md @@ -5,8 +5,7 @@ QuickSRNet Large is designed for upscaling images on mobile platforms to sharpen in real-time. -This is based on the implementation of QuickSRNetLarge found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/quicksrnetlarge). diff --git a/qai_hub_models/models/quicksrnetlarge/evaluate.py b/qai_hub_models/models/quicksrnetlarge/evaluate.py new file mode 100644 index 00000000..9ff84685 --- /dev/null +++ b/qai_hub_models/models/quicksrnetlarge/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.quicksrnetlarge import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["bsd300"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=100, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/quicksrnetlarge/perf.yaml b/qai_hub_models/models/quicksrnetlarge/perf.yaml index ec96e953..1424aa58 100644 --- a/qai_hub_models/models/quicksrnetlarge/perf.yaml +++ b/qai_hub_models/models/quicksrnetlarge/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: QuickSRNetLarge performance_metrics: - torchscript_onnx_tflite: - inference_time: 2515.0 - throughput: 397.61431411530816 + inference_time: 2405.0 + throughput: 415.8004158004158 estimated_peak_memory_range: - min: 28672 - max: 17885168 + min: 3932160 + max: 8007816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: jglv300j5 + job_id: jp8q3e0zp job_status: Passed torchscript_onnx_qnn: - inference_time: 2114.0 - throughput: 473.0368968779565 + inference_time: 2097.0 + throughput: 476.87172150691464 estimated_peak_memory_range: - min: 16384 - max: 76244248 + min: 12288 + max: 29581256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j56yn336p + job_id: jpedr7e75 job_status: Passed torchscript_onnx: - inference_time: 2738.0 - throughput: 365.23009495982467 + inference_time: 2731.0 + throughput: 366.16623947272063 estimated_peak_memory_range: min: 212992 - max: 1438824 + max: 1513512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 33 - job_id: jgdxor7rp + job_id: j57yjelr5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,36 +94,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:39:06Z' - - torchscript_onnx_qnn: - inference_time: 2196.0 - throughput: 455.3734061930783 + timestamp: '2024-11-09T22:22:08Z' + - torchscript_onnx_tflite: + inference_time: 1691.0 + throughput: 591.3660555884093 estimated_peak_memory_range: - min: 241664 - max: 1636632 + min: 20480 + max: 32228240 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 31 + layers_on_npu: 28 layers_on_gpu: 0 - layers_on_cpu: 0 + layers_on_cpu: 3 total_layers: 31 - job_id: jp3je443g + job_id: jgkel27yg job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:38:58Z' - - torchscript_onnx_qnn: - inference_time: 1756.0 - throughput: 569.4760820045558 + torchscript_onnx_qnn: + inference_time: 1501.0 + throughput: 666.2225183211193 estimated_peak_memory_range: min: 0 - max: 8700368 + max: 14460736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -129,7 +123,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jgo2311qp + job_id: jgz3xloz5 + job_status: Passed + torchscript_onnx: + inference_time: 2169.0 + throughput: 461.04195481788844 + estimated_peak_memory_range: + min: 0 + max: 36128832 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 33 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 33 + job_id: jp4lxydl5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -138,13 +147,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-31T14:38:59Z' - - torchscript_onnx_qnn: - inference_time: 1532.0 - throughput: 652.7415143603133 + timestamp: '2024-11-09T22:22:09Z' + - torchscript_onnx_tflite: + inference_time: 1451.0 + throughput: 689.1798759476223 + estimated_peak_memory_range: + min: 12288 + max: 17105664 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 28 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 31 + job_id: j5q67le7p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1577.0 + throughput: 634.1154090044388 estimated_peak_memory_range: min: 0 - max: 10793472 + max: 10276400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -152,7 +176,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jpv6v1zk5 + job_id: j5wedl2z5 + job_status: Passed + torchscript_onnx: + inference_time: 2287.0 + throughput: 437.25404459991256 + estimated_peak_memory_range: + min: 0 + max: 15620000 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 33 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 33 + job_id: jpxk7l695 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -161,13 +200,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-31T14:39:00Z' - - torchscript_onnx_qnn: - inference_time: 2193.0 - throughput: 455.99635202918375 + timestamp: '2024-11-09T22:22:10Z' + - torchscript_onnx_tflite: + inference_time: 2397.0 + throughput: 417.18815185648725 estimated_peak_memory_range: - min: 24576 - max: 3986304 + min: 16384 + max: 6051320 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 28 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 31 + job_id: jglv0y6e5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 2181.0 + throughput: 458.50527281063734 + estimated_peak_memory_range: + min: 229376 + max: 4743848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -175,7 +229,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jgjve0kvg + job_id: jg9l3zjqg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +238,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-31T14:39:01Z' + timestamp: '2024-11-09T22:22:01Z' - torchscript_onnx_tflite: - inference_time: 2401.0 - throughput: 416.49312786339027 + inference_time: 2431.0 + throughput: 411.3533525298231 estimated_peak_memory_range: - min: 12288 - max: 9715272 + min: 16384 + max: 95305960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -198,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: jglv2n985 + job_id: j56y38evp job_status: Passed torchscript_onnx_qnn: - inference_time: 2215.0 - throughput: 451.46726862302484 + inference_time: 2204.0 + throughput: 453.7205081669691 estimated_peak_memory_range: - min: 225280 - max: 1504080 + min: 212992 + max: 1394120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -213,7 +267,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jgo2ny7xp + job_id: jgdxrdekp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -222,13 +276,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:22:38Z' + timestamp: '2024-11-09T22:22:03Z' - torchscript_onnx_tflite: - inference_time: 2374.0 - throughput: 421.22999157540016 + inference_time: 2453.0 + throughput: 407.6640847941296 estimated_peak_memory_range: - min: 16384 - max: 2604064 + min: 28672 + max: 17983832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -236,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: j56yz690p + job_id: jp3j4zvxg job_status: Passed torchscript_onnx_qnn: - inference_time: 2203.0 - throughput: 453.92646391284615 + inference_time: 2229.0 + throughput: 448.63167339614176 estimated_peak_memory_range: - min: 253952 - max: 1613088 + min: 229376 + max: 1537320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -251,7 +305,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jpv6r3yj5 + job_id: j5wedl2j5 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -260,13 +314,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:22:39Z' + timestamp: '2024-11-09T22:22:04Z' - torchscript_onnx_tflite: - inference_time: 2393.0 - throughput: 417.88549937317174 + inference_time: 2419.0 + throughput: 413.39396444811905 estimated_peak_memory_range: - min: 24576 - max: 2227352 + min: 4423680 + max: 25787192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -274,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: jp3j1kllg + job_id: jgo21lk4p job_status: Passed torchscript_onnx_qnn: - inference_time: 2200.0 - throughput: 454.54545454545456 + inference_time: 2201.0 + throughput: 454.3389368468878 estimated_peak_memory_range: - min: 229376 - max: 1515824 + min: 225280 + max: 1552696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -289,7 +343,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jgjv2x6xg + job_id: jg9l3zjvg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -298,13 +352,66 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:22:40Z' - - torchscript_onnx_qnn: - inference_time: 3467.0 - throughput: 288.43380444188057 + timestamp: '2024-11-09T22:22:05Z' + - torchscript_onnx_tflite: + inference_time: 4985.0 + throughput: 200.60180541624874 estimated_peak_memory_range: - min: 212992 - max: 15560288 + min: 6307840 + max: 21984896 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 28 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 31 + job_id: jpv61l075 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 4040.0 + throughput: 247.52475247524754 + estimated_peak_memory_range: + min: 0 + max: 5967744 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 31 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 31 + job_id: jp14dnylp + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:22:06Z' + - torchscript_onnx_tflite: + inference_time: 4143.0 + throughput: 241.3709872073377 + estimated_peak_memory_range: + min: 6311936 + max: 39495936 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 28 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 31 + job_id: jgjv0rz7g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 3480.0 + throughput: 287.35632183908046 + estimated_peak_memory_range: + min: 208896 + max: 13943760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -312,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jp14edq8p + job_id: jgdxrdelp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -321,13 +428,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-31T14:39:06Z' + timestamp: '2024-11-09T22:22:07Z' - torchscript_onnx_qnn: - inference_time: 2588.0 - throughput: 386.39876352395675 + inference_time: 2438.0 + throughput: 410.17227235438884 estimated_peak_memory_range: - min: 204800 - max: 204800 + min: 221184 + max: 221184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -335,7 +442,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jpedkr4o5 + job_id: jp14dnykp + job_status: Passed + torchscript_onnx: + inference_time: 2741.0 + throughput: 364.8303538854433 + estimated_peak_memory_range: + min: 8982528 + max: 8982528 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 33 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 33 + job_id: j5mnw06qp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -344,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-31T14:39:02Z' + timestamp: '2024-11-09T22:22:12Z' diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/README.md b/qai_hub_models/models/quicksrnetlarge_quantized/README.md index 1c550ce8..91e5d259 100644 --- a/qai_hub_models/models/quicksrnetlarge_quantized/README.md +++ b/qai_hub_models/models/quicksrnetlarge_quantized/README.md @@ -5,8 +5,7 @@ QuickSRNet Large is designed for upscaling images on mobile platforms to sharpen in real-time. -This is based on the implementation of QuickSRNetLarge-Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/quicksrnetlarge_quantized). diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/conftest.py b/qai_hub_models/models/quicksrnetlarge_quantized/conftest.py index d980e4bf..f6c13e8d 100644 --- a/qai_hub_models/models/quicksrnetlarge_quantized/conftest.py +++ b/qai_hub_models/models/quicksrnetlarge_quantized/conftest.py @@ -9,7 +9,6 @@ import pytest from qai_hub_models.models.quicksrnetlarge_quantized import Model -from qai_hub_models.utils.testing import skip_clone_repo_check # Instantiate the model only once for all tests. @@ -22,7 +21,6 @@ def cached_from_pretrained(): from_pretrained = Model.from_pretrained sig = inspect.signature(from_pretrained) - @skip_clone_repo_check def _cached_from_pretrained(*args, **kwargs): cache_key = str(args) + str(kwargs) model = pretrained_cache.get(cache_key, None) diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/evaluate.py b/qai_hub_models/models/quicksrnetlarge_quantized/evaluate.py new file mode 100644 index 00000000..845c2921 --- /dev/null +++ b/qai_hub_models/models/quicksrnetlarge_quantized/evaluate.py @@ -0,0 +1,55 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.quicksrnetlarge_quantized import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["bsd300"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=100, + supported_datasets=SUPPORTED_DATASETS, + is_hub_quantized=True, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/export.py b/qai_hub_models/models/quicksrnetlarge_quantized/export.py index 2550379f..4be08f87 100644 --- a/qai_hub_models/models/quicksrnetlarge_quantized/export.py +++ b/qai_hub_models/models/quicksrnetlarge_quantized/export.py @@ -13,6 +13,7 @@ from typing import Any, Optional, cast import qai_hub as hub +import torch from qai_hub_models.models.common import ExportResult, TargetRuntime from qai_hub_models.models.quicksrnetlarge_quantized import Model @@ -22,6 +23,7 @@ get_model_kwargs, ) from qai_hub_models.utils.compare import torch_inference +from qai_hub_models.utils.input_spec import make_torch_inputs from qai_hub_models.utils.printing import ( print_inference_metrics, print_on_target_demo_cmd, @@ -31,11 +33,14 @@ can_access_qualcomm_ai_hub, export_without_hub_access, ) +from qai_hub_models.utils.quantization import get_calibration_data def export_model( device: Optional[str] = None, chipset: Optional[str] = None, + num_calibration_samples: int = 100, + skip_compiling: bool = False, skip_profiling: bool = False, skip_inferencing: bool = False, skip_downloading: bool = False, @@ -50,13 +55,14 @@ def export_model( This function executes the following recipe: 1. Instantiates a PyTorch model and converts it to a traced TorchScript format - 2. Compiles the model to an asset that can be run on device - 3. Profiles the model performance on a real device - 4. Inferences the model on sample inputs - 5. Downloads the model asset to the local directory - 6. Summarizes the results from profiling and inference + 2. Converts the PyTorch model to ONNX and quantizes the ONNX model. + 3. Compiles the model to an asset that can be run on device + 4. Profiles the model performance on a real device + 5. Inferences the model on sample inputs + 6. Downloads the model asset to the local directory + 7. Summarizes the results from profiling and inference - Each of the last 4 steps can be optionally skipped using the input options. + Each of the last 5 steps can be optionally skipped using the input options. Parameters: device: Device for which to export the model. @@ -64,6 +70,9 @@ def export_model( Defaults to DEFAULT_DEVICE if not specified. chipset: If set, will choose a random device with this chipset. Overrides the `device` argument. + num_calibration_samples: The number of calibration data samples + to use for quantization. + skip_compiling: If set, skips compiling model to format that can run on device. skip_profiling: If set, skips profiling of compiled model on real devices. skip_inferencing: If set, skips computing on-device outputs from sample data. skip_downloading: If set, skips downloading of compiled model. @@ -79,9 +88,10 @@ def export_model( Returns: A struct of: - * A CompileJob object containing metadata about the compile job submitted to hub. + * A CompileJob object containing metadata about the compile job submitted to hub (None if compiling skipped). * An InferenceJob containing metadata about the inference job (None if inferencing skipped). * A ProfileJob containing metadata about the profile job (None if profiling skipped). + * A QuantizeJob object containing metadata about the quantize job submitted to hub """ model_name = "quicksrnetlarge_quantized" output_path = Path(output_dir or Path.cwd() / "build" / model_name) @@ -115,26 +125,45 @@ def export_model( ) # Trace the model - source_model = model.convert_to_hub_source_model( - target_runtime, output_path, input_spec + source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) + + print(f"Quantizing model {model_name} with {num_calibration_samples} samples.") + # 2. Converts the PyTorch model to ONNX and quantizes the ONNX model. + onnx_compile_job = hub.submit_compile_job( + model=source_model, + input_specs=input_spec, + device=hub_device, + name=model_name, + options="--target_runtime onnx", + ) + quantize_job = hub.submit_quantize_job( + model=onnx_compile_job.get_target_model(), + calibration_data=get_calibration_data( + input_spec, "bsd300", num_calibration_samples + ), + weights_dtype=model.get_weights_dtype(), + activations_dtype=model.get_activations_dtype(), + name=model_name, + options=model.get_quantize_options(), ) + if skip_compiling: + return ExportResult(quantize_job=quantize_job) - # 2. Compiles the model to an asset that can be run on device + # 3. Compiles the model to an asset that can be run on device model_compile_options = model.get_hub_compile_options( target_runtime, compile_options, hub_device ) print(f"Optimizing model {model_name} to run on-device") submitted_compile_job = hub.submit_compile_job( - model=source_model, + model=quantize_job.get_target_model(), input_specs=input_spec, device=hub_device, name=model_name, - calibration_data=model.get_calibration_data(target_runtime), options=model_compile_options, ) compile_job = cast(hub.client.CompileJob, submitted_compile_job) - # 3. Profiles the model performance on a real device + # 4. Profiles the model performance on a real device profile_job: Optional[hub.client.ProfileJob] = None if not skip_profiling: profile_options_all = model.get_hub_profile_options( @@ -149,7 +178,7 @@ def export_model( ) profile_job = cast(hub.client.ProfileJob, submitted_profile_job) - # 4. Inferences the model on sample inputs + # 5. Inferences the model on sample inputs inference_job: Optional[hub.client.InferenceJob] = None if not skip_inferencing: profile_options_all = model.get_hub_profile_options( @@ -170,13 +199,13 @@ def export_model( ) inference_job = cast(hub.client.InferenceJob, submitted_inference_job) - # 5. Downloads the model asset to the local directory + # 6. Downloads the model asset to the local directory if not skip_downloading: os.makedirs(output_path, exist_ok=True) target_model: hub.Model = compile_job.get_target_model() # type: ignore target_model.download(str(output_path / model_name)) - # 6. Summarizes the results from profiling and inference + # 7. Summarizes the results from profiling and inference if not skip_summary and not skip_profiling: assert profile_job is not None and profile_job.wait().success profile_data: dict[str, Any] = profile_job.download_profile() # type: ignore @@ -201,12 +230,13 @@ def export_model( compile_job=compile_job, inference_job=inference_job, profile_job=profile_job, + quantize_job=quantize_job, ) def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, is_hub_quantized=True) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/model.py b/qai_hub_models/models/quicksrnetlarge_quantized/model.py index 297ae175..a318307b 100644 --- a/qai_hub_models/models/quicksrnetlarge_quantized/model.py +++ b/qai_hub_models/models/quicksrnetlarge_quantized/model.py @@ -4,76 +4,11 @@ # --------------------------------------------------------------------- from __future__ import annotations -# isort: off -# This verifies aimet is installed, and this must be included first. -from qai_hub_models.utils.quantization_aimet import ( - AIMETQuantizableMixin, - constrain_quantized_inputs_to_image_range, -) - -# isort: on - -import torch -from aimet_torch.cross_layer_equalization import equalize_model -from aimet_torch.model_preparer import prepare_model -from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim - -from qai_hub_models.models._shared.super_resolution.model import DEFAULT_SCALE_FACTOR from qai_hub_models.models.quicksrnetlarge.model import QuickSRNetLarge -from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset +from qai_hub_models.utils.quantization import HubQuantizableMixin MODEL_ID = __name__.split(".")[-2] -MODEL_ASSET_VERSION = 3 - -DEFAULT_ENCODINGS = "quicksrnetlarge_quantized_encodings.json" - - -class QuickSRNetLargeQuantizable(AIMETQuantizableMixin, QuickSRNetLarge): - """QuickSRNetLarge with post train quantization support. - - Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints. - Support for quantizing using your own weights & data will come at a later date.""" - - def __init__( - self, quicksrnet_model: QuantizationSimModel, scale_factor: int - ) -> None: - QuickSRNetLarge.__init__(self, quicksrnet_model.model, scale_factor) - AIMETQuantizableMixin.__init__(self, quicksrnet_model) - - @classmethod - def from_pretrained( - cls, - aimet_encodings: str | None = "DEFAULT", - scale_factor: int = DEFAULT_SCALE_FACTOR, - ) -> QuickSRNetLargeQuantizable: - """ - Parameters: - aimet_encodings: - if "DEFAULT": Loads the model with aimet encodings calibrated on BSD300. - elif None: Doesn't load any encodings. Used when computing encodings. - else: Interprets as a filepath and loads the encodings stored there. - """ - # Load Model - fp16_model = QuickSRNetLarge.from_pretrained(scale_factor) - input_shape = cls.get_input_spec()["image"][0] - model = prepare_model(fp16_model) - equalize_model(model, input_shape) - sim = QuantizationSimModel( - model, - quant_scheme="tf_enhanced", - default_param_bw=8, - default_output_bw=8, - config_file=get_default_aimet_config(), - dummy_input=torch.rand(input_shape), - ) - constrain_quantized_inputs_to_image_range(sim) - if aimet_encodings: - if aimet_encodings == "DEFAULT": - aimet_encodings = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_ENCODINGS - ).fetch() - load_encodings_to_sim(sim, aimet_encodings) - return cls(sim, scale_factor) +class QuickSRNetLargeQuantizable(HubQuantizableMixin, QuickSRNetLarge): + pass diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml b/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml index be2500a0..9bce1fe7 100644 --- a/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml +++ b/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml @@ -20,6 +20,8 @@ aggregated: - Snapdragon X Plus 8-Core CRD - QCS6490 (Proxy) - RB3 Gen 2 (Proxy) + - QCS8250 (Proxy) + - RB5 (Proxy) - QCS8450 (Proxy) - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) @@ -36,6 +38,7 @@ aggregated: - Snapdragon® X Elite - Snapdragon® X Plus 8-Core - QCS6490 Proxy + - QCS8250 Proxy - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy @@ -46,11 +49,11 @@ models: - name: QuickSRNetLarge-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1479.0 - throughput: 676.132521974307 + inference_time: 1432.0 + throughput: 698.3240223463687 estimated_peak_memory_range: - min: 1597440 - max: 51685456 + min: 24576 + max: 1499744 primary_compute_unit: NPU precision: int8 layer_info: @@ -58,14 +61,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 33 - job_id: jp14edd2p + job_id: j57yj8z95 job_status: Passed torchscript_onnx_qnn: - inference_time: 905.0 - throughput: 1104.9723756906078 + inference_time: 908.0 + throughput: 1101.3215859030836 estimated_peak_memory_range: - min: 12288 - max: 8566656 + min: 28672 + max: 8536128 primary_compute_unit: NPU precision: int8 layer_info: @@ -73,22 +76,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jgdxorrep + job_id: j5q67d8op job_status: Passed torchscript_onnx: - inference_time: 905.0 - throughput: 1104.9723756906078 + inference_time: 19669.0 + throughput: 50.841425593573646 estimated_peak_memory_range: - min: 16384 - max: 16319128 + min: 24436736 + max: 26734072 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 34 + layers_on_npu: 117 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 34 - job_id: jpy16448p + layers_on_cpu: 27 + total_layers: 144 + job_id: jp14dml7p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -97,36 +100,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:38:42Z' - - torchscript_onnx_qnn: - inference_time: 687.0 - throughput: 1455.604075691412 + timestamp: '2024-11-09T23:43:24Z' + - torchscript_onnx_tflite: + inference_time: 1120.0 + throughput: 892.8571428571429 estimated_peak_memory_range: - min: 0 - max: 1254544 + min: 20480 + max: 29504048 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 31 + layers_on_npu: 30 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 31 - job_id: j5weqdd35 + layers_on_cpu: 3 + total_layers: 33 + job_id: jp4lx2q15 job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:38:31Z' - - torchscript_onnx_qnn: - inference_time: 461.0 - throughput: 2169.1973969631235 + torchscript_onnx_qnn: + inference_time: 640.0 + throughput: 1562.5 estimated_peak_memory_range: min: 12288 - max: 8560544 + max: 14476032 primary_compute_unit: NPU precision: int8 layer_info: @@ -134,7 +129,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jg9lw33wg + job_id: jglv0qzm5 + job_status: Passed + torchscript_onnx: + inference_time: 13988.0 + throughput: 71.4898484415213 + estimated_peak_memory_range: + min: 26128384 + max: 108561920 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 117 + layers_on_gpu: 0 + layers_on_cpu: 27 + total_layers: 144 + job_id: jgdxrm9zp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -143,13 +153,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-31T14:38:32Z' - - torchscript_onnx_qnn: - inference_time: 679.0 - throughput: 1472.7540500736377 + timestamp: '2024-11-09T23:43:27Z' + - torchscript_onnx_tflite: + inference_time: 1334.0 + throughput: 749.6251874062968 + estimated_peak_memory_range: + min: 0 + max: 20711280 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 30 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 33 + job_id: jpxk7zvl5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 652.0 + throughput: 1533.7423312883436 estimated_peak_memory_range: min: 8192 - max: 12122720 + max: 11556560 primary_compute_unit: NPU precision: int8 layer_info: @@ -157,7 +182,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jp14edd8p + job_id: j56y30jyp + job_status: Passed + torchscript_onnx: + inference_time: 13222.0 + throughput: 75.63152321887763 + estimated_peak_memory_range: + min: 27811840 + max: 78645504 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 117 + layers_on_gpu: 0 + layers_on_cpu: 27 + total_layers: 144 + job_id: j57yj8w95 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -166,13 +206,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-31T14:38:33Z' - - torchscript_onnx_qnn: - inference_time: 3193.0 - throughput: 313.18509238960223 + timestamp: '2024-11-09T23:43:30Z' + - torchscript_onnx_tflite: + inference_time: 4110.0 + throughput: 243.30900243309003 estimated_peak_memory_range: - min: 65536 - max: 8586240 + min: 1605632 + max: 22766592 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 30 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 33 + job_id: j5mnwlr9p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 3149.0 + throughput: 317.56113051762463 + estimated_peak_memory_range: + min: 16384 + max: 8292128 primary_compute_unit: NPU precision: int8 layer_info: @@ -180,7 +235,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jgdxorrrp + job_id: jp3j4r3ng job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -189,21 +244,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-31T14:38:34Z' - - reference_device_info: + timestamp: '2024-11-09T23:43:01Z' + - torchscript_onnx_tflite: + inference_time: 38391.0 + throughput: 26.047771613138497 + estimated_peak_memory_range: + min: 1794048 + max: 3751856 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 30 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 33 + job_id: jgn69w2q5 + job_status: Passed + reference_device_info: name: RB5 (Proxy) os: '12' form_factor: Iot os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-26T23:33:55Z' - - torchscript_onnx_qnn: - inference_time: 679.0 - throughput: 1472.7540500736377 + timestamp: '2024-11-09T23:42:33Z' + - torchscript_onnx_tflite: + inference_time: 1432.0 + throughput: 698.3240223463687 estimated_peak_memory_range: - min: 81920 - max: 1594712 + min: 28672 + max: 1170160 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 30 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 33 + job_id: jprv47k7g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 678.0 + throughput: 1474.9262536873157 + estimated_peak_memory_range: + min: 114688 + max: 1563160 primary_compute_unit: NPU precision: int8 layer_info: @@ -211,7 +296,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j57yxjjv5 + job_id: jgo2190kp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -220,13 +305,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-31T14:38:35Z' + timestamp: '2024-11-09T23:43:04Z' - torchscript_onnx_tflite: - inference_time: 1451.0 - throughput: 689.1798759476223 + inference_time: 1453.0 + throughput: 688.2312456985547 estimated_peak_memory_range: - min: 24576 - max: 1375144 + min: 1605632 + max: 3032248 primary_compute_unit: NPU precision: int8 layer_info: @@ -234,14 +319,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 33 - job_id: j57ylz1l5 + job_id: jp2k7z8qp job_status: Passed torchscript_onnx_qnn: - inference_time: 678.0 - throughput: 1474.9262536873157 + inference_time: 673.0 + throughput: 1485.8841010401188 estimated_peak_memory_range: - min: 73728 - max: 1450664 + min: 81920 + max: 1423688 primary_compute_unit: NPU precision: int8 layer_info: @@ -249,7 +334,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jgn6m2dr5 + job_id: jgjv08meg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -258,13 +343,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:22:19Z' + timestamp: '2024-11-09T23:43:10Z' - torchscript_onnx_tflite: - inference_time: 1443.0 - throughput: 693.000693000693 + inference_time: 1447.0 + throughput: 691.0850034554251 estimated_peak_memory_range: - min: 12288 - max: 1391056 + min: 811008 + max: 2182024 primary_compute_unit: NPU precision: int8 layer_info: @@ -272,14 +357,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 33 - job_id: jp4ldq6v5 + job_id: jpy14yelp job_status: Passed torchscript_onnx_qnn: - inference_time: 680.0 - throughput: 1470.5882352941176 + inference_time: 679.0 + throughput: 1472.7540500736377 estimated_peak_memory_range: - min: 126976 - max: 2452864 + min: 81920 + max: 1490528 primary_compute_unit: NPU precision: int8 layer_info: @@ -287,7 +372,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jprv2km9g + job_id: jpedrn1v5 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -296,13 +381,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:22:20Z' + timestamp: '2024-11-09T23:43:13Z' - torchscript_onnx_tflite: - inference_time: 1448.0 - throughput: 690.6077348066299 + inference_time: 1450.0 + throughput: 689.6551724137931 estimated_peak_memory_range: - min: 16384 - max: 6189032 + min: 12288 + max: 3117720 primary_compute_unit: NPU precision: int8 layer_info: @@ -310,14 +395,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 33 - job_id: j5mn6r1wp + job_id: jp0z1xyn5 job_status: Passed torchscript_onnx_qnn: - inference_time: 683.0 - throughput: 1464.1288433382138 + inference_time: 681.0 + throughput: 1468.4287812041116 estimated_peak_memory_range: - min: 77824 - max: 1357048 + min: 86016 + max: 1835424 primary_compute_unit: NPU precision: int8 layer_info: @@ -325,7 +410,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jpy1jek7p + job_id: jgz3x09x5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -334,13 +419,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:22:22Z' + timestamp: '2024-11-09T23:43:16Z' - torchscript_onnx_tflite: - inference_time: 2980.0 - throughput: 335.5704697986577 + inference_time: 2580.0 + throughput: 387.5968992248062 estimated_peak_memory_range: - min: 1589248 - max: 21479552 + min: 12288 + max: 19752272 primary_compute_unit: NPU precision: int8 layer_info: @@ -348,14 +433,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 33 - job_id: jpxk6v815 + job_id: jp8q3koop job_status: Passed torchscript_onnx_qnn: - inference_time: 1487.0 - throughput: 672.4949562878278 + inference_time: 1471.0 + throughput: 679.8096532970768 estimated_peak_memory_range: min: 12288 - max: 5423440 + max: 5828640 primary_compute_unit: NPU precision: int8 layer_info: @@ -363,7 +448,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jp2k98q4p + job_id: j5wedrvm5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -372,13 +457,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T01:22:21Z' - - torchscript_onnx_qnn: + timestamp: '2024-11-09T23:43:19Z' + - torchscript_onnx_tflite: + inference_time: 1885.0 + throughput: 530.5039787798408 + estimated_peak_memory_range: + min: 20480 + max: 30540000 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 30 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 33 + job_id: jgkelkzng + job_status: Passed + torchscript_onnx_qnn: inference_time: 1048.0 throughput: 954.1984732824427 estimated_peak_memory_range: - min: 65536 - max: 15662864 + min: 12288 + max: 14032448 primary_compute_unit: NPU precision: int8 layer_info: @@ -386,7 +486,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jp2kl77rp + job_id: jg9l3q18g job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -395,13 +495,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-31T14:38:41Z' + timestamp: '2024-11-09T23:43:22Z' - torchscript_onnx_qnn: - inference_time: 834.0 - throughput: 1199.0407673860911 + inference_time: 794.0 + throughput: 1259.4458438287154 estimated_peak_memory_range: - min: 188416 - max: 188416 + min: 98304 + max: 98304 primary_compute_unit: NPU precision: int8 layer_info: @@ -409,7 +509,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jp4lvxx85 + job_id: jpv61nor5 + job_status: Passed + torchscript_onnx: + inference_time: 17516.0 + throughput: 57.09065996802923 + estimated_peak_memory_range: + min: 34996224 + max: 34996224 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 117 + layers_on_gpu: 0 + layers_on_cpu: 27 + total_layers: 144 + job_id: jp4lx2o15 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -418,4 +533,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-31T14:38:36Z' + timestamp: '2024-11-09T23:43:32Z' diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/test.py b/qai_hub_models/models/quicksrnetlarge_quantized/test.py deleted file mode 100644 index 81430c06..00000000 --- a/qai_hub_models/models/quicksrnetlarge_quantized/test.py +++ /dev/null @@ -1,65 +0,0 @@ -# --------------------------------------------------------------------- -# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -# --------------------------------------------------------------------- - -import numpy as np -import pytest -import torch - -from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp -from qai_hub_models.models._shared.super_resolution.demo import IMAGE_ADDRESS -from qai_hub_models.models.quicksrnetlarge.model import MODEL_ASSET_VERSION, MODEL_ID -from qai_hub_models.models.quicksrnetlarge_quantized.demo import main as demo_main -from qai_hub_models.models.quicksrnetlarge_quantized.model import ( - QuickSRNetLargeQuantizable, -) -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image -from qai_hub_models.utils.testing import assert_most_close, skip_clone_repo_check - -OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetlarge_demo_output.png" -) - - -@skip_clone_repo_check -def test_task(): - # AIMET Quantization Simulator introduces randomness. Eliminate that for this test. - torch.manual_seed(0) - image = load_image(IMAGE_ADDRESS) - model = QuickSRNetLargeQuantizable.from_pretrained() - app = SuperResolutionApp(model=model) - app_output_image = app.predict(image)[0] - - output_image = load_image(OUTPUT_IMAGE_ADDRESS) - assert_most_close( - np.asarray(app_output_image, dtype=np.float32) / 255, - np.asarray(output_image, dtype=np.float32) / 255, - diff_tol=0.005, - rtol=0.02, - atol=0.2, - ) - - -@pytest.mark.trace -@skip_clone_repo_check -def test_trace(): - image = load_image(IMAGE_ADDRESS) - output_image = load_image(OUTPUT_IMAGE_ADDRESS) - app = SuperResolutionApp( - QuickSRNetLargeQuantizable.from_pretrained().convert_to_torchscript() - ) - app_output_image = app.predict(image)[0] - - assert_most_close( - np.asarray(app_output_image, dtype=np.float32) / 255, - np.asarray(output_image, dtype=np.float32) / 255, - diff_tol=0.005, - rtol=0.02, - atol=0.2, - ) - - -@skip_clone_repo_check -def test_demo(): - demo_main(is_test=True) diff --git a/qai_hub_models/models/quicksrnetmedium/README.md b/qai_hub_models/models/quicksrnetmedium/README.md index 35ffd138..89724b8e 100644 --- a/qai_hub_models/models/quicksrnetmedium/README.md +++ b/qai_hub_models/models/quicksrnetmedium/README.md @@ -5,8 +5,7 @@ QuickSRNet Medium is designed for upscaling images on mobile platforms to sharpen in real-time. -This is based on the implementation of QuickSRNetMedium found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/quicksrnetmedium). diff --git a/qai_hub_models/models/quicksrnetmedium/evaluate.py b/qai_hub_models/models/quicksrnetmedium/evaluate.py new file mode 100644 index 00000000..1fa80f52 --- /dev/null +++ b/qai_hub_models/models/quicksrnetmedium/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.quicksrnetmedium import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["bsd300"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=100, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/quicksrnetmedium/perf.yaml b/qai_hub_models/models/quicksrnetmedium/perf.yaml index f7196260..4a4a4710 100644 --- a/qai_hub_models/models/quicksrnetmedium/perf.yaml +++ b/qai_hub_models/models/quicksrnetmedium/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: QuickSRNetMedium performance_metrics: - torchscript_onnx_tflite: - inference_time: 1402.0 - throughput: 713.2667617689016 + inference_time: 1338.0 + throughput: 747.3841554559043 estimated_peak_memory_range: - min: 32768 - max: 8686720 + min: 24576 + max: 1383632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jp2kl7m4p + job_id: jprv4l6vg job_status: Passed torchscript_onnx_qnn: - inference_time: 1005.0 - throughput: 995.0248756218906 + inference_time: 996.0 + throughput: 1004.0160642570281 estimated_peak_memory_range: - min: 229376 - max: 62275816 + min: 221184 + max: 2594576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jpy164d7p + job_id: jp3j4znxg job_status: Passed torchscript_onnx: - inference_time: 1514.0 - throughput: 660.5019815059445 + inference_time: 1479.0 + throughput: 676.132521974307 estimated_peak_memory_range: min: 212992 - max: 1674728 + max: 1635856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 19 - job_id: jpedkrv15 + job_id: j57yje2q5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,36 +94,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:38:13Z' - - torchscript_onnx_qnn: - inference_time: 910.0 - throughput: 1098.901098901099 + timestamp: '2024-11-09T22:21:27Z' + - torchscript_onnx_tflite: + inference_time: 907.0 + throughput: 1102.5358324145534 estimated_peak_memory_range: - min: 233472 - max: 1822560 + min: 20480 + max: 22583264 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 17 + layers_on_npu: 14 layers_on_gpu: 0 - layers_on_cpu: 0 + layers_on_cpu: 3 total_layers: 17 - job_id: jp0zl1r65 + job_id: jp2k7rxxp job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:38:05Z' - - torchscript_onnx_qnn: - inference_time: 737.0 - throughput: 1356.85210312076 + torchscript_onnx_qnn: + inference_time: 648.0 + throughput: 1543.20987654321 estimated_peak_memory_range: min: 0 - max: 8664336 + max: 11497952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -129,7 +123,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jp8qz37xp + job_id: jgo21lz4p + job_status: Passed + torchscript_onnx: + inference_time: 1069.0 + throughput: 935.4536950420954 + estimated_peak_memory_range: + min: 0 + max: 24307760 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 19 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 19 + job_id: jp4lxynq5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -138,13 +147,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-31T14:38:05Z' - - torchscript_onnx_qnn: - inference_time: 712.0 - throughput: 1404.4943820224719 + timestamp: '2024-11-09T22:21:28Z' + - torchscript_onnx_tflite: + inference_time: 787.0 + throughput: 1270.6480304955528 estimated_peak_memory_range: - min: 0 - max: 8908656 + min: 12288 + max: 15572096 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 14 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 17 + job_id: jpy14ozrp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 688.0 + throughput: 1453.4883720930231 + estimated_peak_memory_range: + min: 208896 + max: 8883936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -152,7 +176,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jgke3ly2g + job_id: jpv61lq75 + job_status: Passed + torchscript_onnx: + inference_time: 919.0 + throughput: 1088.139281828074 + estimated_peak_memory_range: + min: 0 + max: 16136224 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 19 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 19 + job_id: jpxk7l9j5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -161,13 +200,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-31T14:38:07Z' - - torchscript_onnx_qnn: - inference_time: 903.0 - throughput: 1107.4197120708748 + timestamp: '2024-11-09T22:21:29Z' + - torchscript_onnx_tflite: + inference_time: 1345.0 + throughput: 743.4944237918215 estimated_peak_memory_range: - min: 229376 - max: 1358632 + min: 24576 + max: 7107704 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 14 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 17 + job_id: jp0z1m425 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 926.0 + throughput: 1079.913606911447 + estimated_peak_memory_range: + min: 233472 + max: 1529272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -175,7 +229,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: j5q63724p + job_id: jgjv0rd7g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +238,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-31T14:38:08Z' + timestamp: '2024-11-09T22:21:19Z' - torchscript_onnx_tflite: - inference_time: 1302.0 - throughput: 768.0491551459294 + inference_time: 1345.0 + throughput: 743.4944237918215 estimated_peak_memory_range: min: 20480 - max: 11374032 + max: 7363224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -198,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jgz3je065 + job_id: jp8q3e2zp job_status: Passed torchscript_onnx_qnn: - inference_time: 933.0 - throughput: 1071.8113612004288 + inference_time: 926.0 + throughput: 1079.913606911447 estimated_peak_memory_range: - min: 221184 - max: 1547000 + min: 233472 + max: 1659872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -213,7 +267,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jp14w0mlp + job_id: jgz3xl2z5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -222,13 +276,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:21:58Z' + timestamp: '2024-11-09T22:21:22Z' - torchscript_onnx_tflite: - inference_time: 1325.0 - throughput: 754.7169811320755 + inference_time: 1296.0 + throughput: 771.604938271605 estimated_peak_memory_range: min: 16384 - max: 8213624 + max: 6953544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -236,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: j5we3orj5 + job_id: jgkel2vyg job_status: Passed torchscript_onnx_qnn: - inference_time: 940.0 - throughput: 1063.8297872340424 + inference_time: 926.0 + throughput: 1079.913606911447 estimated_peak_memory_range: - min: 229376 - max: 1590864 + min: 221184 + max: 1523496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -251,7 +305,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jgdxqwmlp + job_id: j5wedlwz5 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -260,13 +314,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:21:59Z' + timestamp: '2024-11-09T22:21:23Z' - torchscript_onnx_tflite: - inference_time: 1318.0 - throughput: 758.7253414264036 + inference_time: 1332.0 + throughput: 750.7507507507507 estimated_peak_memory_range: min: 16384 - max: 8764904 + max: 2560056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -274,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jg9lyvqvg + job_id: j5q67l07p job_status: Passed torchscript_onnx_qnn: - inference_time: 930.0 - throughput: 1075.268817204301 + inference_time: 920.0 + throughput: 1086.9565217391305 estimated_peak_memory_range: - min: 221184 - max: 1659168 + min: 24576 + max: 1376144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -289,7 +343,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: j5we3o065 + job_id: jg9l3z0qg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -298,13 +352,66 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:22:00Z' - - torchscript_onnx_qnn: - inference_time: 1281.0 - throughput: 780.64012490242 + timestamp: '2024-11-09T22:21:24Z' + - torchscript_onnx_tflite: + inference_time: 2265.0 + throughput: 441.5011037527594 estimated_peak_memory_range: - min: 212992 - max: 14473600 + min: 6307840 + max: 20981488 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 14 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 17 + job_id: jglv0y4e5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1730.0 + throughput: 578.0346820809249 + estimated_peak_memory_range: + min: 217088 + max: 6133824 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 17 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 17 + job_id: jp14dn2kp + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:21:25Z' + - torchscript_onnx_tflite: + inference_time: 1869.0 + throughput: 535.0454788657036 + estimated_peak_memory_range: + min: 20480 + max: 23366800 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 14 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 17 + job_id: j56y382vp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1226.0 + throughput: 815.6606851549756 + estimated_peak_memory_range: + min: 204800 + max: 15319776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -312,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jgjve0lxg + job_id: jgdxrdnkp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -321,13 +428,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-31T14:38:13Z' + timestamp: '2024-11-09T22:21:26Z' - torchscript_onnx_qnn: - inference_time: 1015.0 - throughput: 985.2216748768473 + inference_time: 1041.0 + throughput: 960.6147934678194 estimated_peak_memory_range: - min: 204800 - max: 204800 + min: 208896 + max: 208896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -335,7 +442,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jglv30k85 + job_id: jpedr7o75 + job_status: Passed + torchscript_onnx: + inference_time: 1533.0 + throughput: 652.3157208088714 + estimated_peak_memory_range: + min: 8957952 + max: 8957952 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 19 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 19 + job_id: j5mnw0eyp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -344,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-31T14:38:09Z' + timestamp: '2024-11-09T22:21:30Z' diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/README.md b/qai_hub_models/models/quicksrnetmedium_quantized/README.md index 64ae7804..f3cde50b 100644 --- a/qai_hub_models/models/quicksrnetmedium_quantized/README.md +++ b/qai_hub_models/models/quicksrnetmedium_quantized/README.md @@ -5,8 +5,7 @@ QuickSRNet Medium is designed for upscaling images on mobile platforms to sharpen in real-time. -This is based on the implementation of QuickSRNetMedium-Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/quicksrnetmedium_quantized). diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/conftest.py b/qai_hub_models/models/quicksrnetmedium_quantized/conftest.py index c6ff94ea..bd110289 100644 --- a/qai_hub_models/models/quicksrnetmedium_quantized/conftest.py +++ b/qai_hub_models/models/quicksrnetmedium_quantized/conftest.py @@ -9,7 +9,6 @@ import pytest from qai_hub_models.models.quicksrnetmedium_quantized import Model -from qai_hub_models.utils.testing import skip_clone_repo_check # Instantiate the model only once for all tests. @@ -22,7 +21,6 @@ def cached_from_pretrained(): from_pretrained = Model.from_pretrained sig = inspect.signature(from_pretrained) - @skip_clone_repo_check def _cached_from_pretrained(*args, **kwargs): cache_key = str(args) + str(kwargs) model = pretrained_cache.get(cache_key, None) diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/evaluate.py b/qai_hub_models/models/quicksrnetmedium_quantized/evaluate.py new file mode 100644 index 00000000..6c828eb5 --- /dev/null +++ b/qai_hub_models/models/quicksrnetmedium_quantized/evaluate.py @@ -0,0 +1,55 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.quicksrnetmedium_quantized import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["bsd300"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=100, + supported_datasets=SUPPORTED_DATASETS, + is_hub_quantized=True, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/export.py b/qai_hub_models/models/quicksrnetmedium_quantized/export.py index 9580747f..b2790268 100644 --- a/qai_hub_models/models/quicksrnetmedium_quantized/export.py +++ b/qai_hub_models/models/quicksrnetmedium_quantized/export.py @@ -13,6 +13,7 @@ from typing import Any, Optional, cast import qai_hub as hub +import torch from qai_hub_models.models.common import ExportResult, TargetRuntime from qai_hub_models.models.quicksrnetmedium_quantized import Model @@ -22,6 +23,7 @@ get_model_kwargs, ) from qai_hub_models.utils.compare import torch_inference +from qai_hub_models.utils.input_spec import make_torch_inputs from qai_hub_models.utils.printing import ( print_inference_metrics, print_on_target_demo_cmd, @@ -31,11 +33,14 @@ can_access_qualcomm_ai_hub, export_without_hub_access, ) +from qai_hub_models.utils.quantization import get_calibration_data def export_model( device: Optional[str] = None, chipset: Optional[str] = None, + num_calibration_samples: int = 100, + skip_compiling: bool = False, skip_profiling: bool = False, skip_inferencing: bool = False, skip_downloading: bool = False, @@ -50,13 +55,14 @@ def export_model( This function executes the following recipe: 1. Instantiates a PyTorch model and converts it to a traced TorchScript format - 2. Compiles the model to an asset that can be run on device - 3. Profiles the model performance on a real device - 4. Inferences the model on sample inputs - 5. Downloads the model asset to the local directory - 6. Summarizes the results from profiling and inference + 2. Converts the PyTorch model to ONNX and quantizes the ONNX model. + 3. Compiles the model to an asset that can be run on device + 4. Profiles the model performance on a real device + 5. Inferences the model on sample inputs + 6. Downloads the model asset to the local directory + 7. Summarizes the results from profiling and inference - Each of the last 4 steps can be optionally skipped using the input options. + Each of the last 5 steps can be optionally skipped using the input options. Parameters: device: Device for which to export the model. @@ -64,6 +70,9 @@ def export_model( Defaults to DEFAULT_DEVICE if not specified. chipset: If set, will choose a random device with this chipset. Overrides the `device` argument. + num_calibration_samples: The number of calibration data samples + to use for quantization. + skip_compiling: If set, skips compiling model to format that can run on device. skip_profiling: If set, skips profiling of compiled model on real devices. skip_inferencing: If set, skips computing on-device outputs from sample data. skip_downloading: If set, skips downloading of compiled model. @@ -79,9 +88,10 @@ def export_model( Returns: A struct of: - * A CompileJob object containing metadata about the compile job submitted to hub. + * A CompileJob object containing metadata about the compile job submitted to hub (None if compiling skipped). * An InferenceJob containing metadata about the inference job (None if inferencing skipped). * A ProfileJob containing metadata about the profile job (None if profiling skipped). + * A QuantizeJob object containing metadata about the quantize job submitted to hub """ model_name = "quicksrnetmedium_quantized" output_path = Path(output_dir or Path.cwd() / "build" / model_name) @@ -115,26 +125,45 @@ def export_model( ) # Trace the model - source_model = model.convert_to_hub_source_model( - target_runtime, output_path, input_spec + source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) + + print(f"Quantizing model {model_name} with {num_calibration_samples} samples.") + # 2. Converts the PyTorch model to ONNX and quantizes the ONNX model. + onnx_compile_job = hub.submit_compile_job( + model=source_model, + input_specs=input_spec, + device=hub_device, + name=model_name, + options="--target_runtime onnx", + ) + quantize_job = hub.submit_quantize_job( + model=onnx_compile_job.get_target_model(), + calibration_data=get_calibration_data( + input_spec, "bsd300", num_calibration_samples + ), + weights_dtype=model.get_weights_dtype(), + activations_dtype=model.get_activations_dtype(), + name=model_name, + options=model.get_quantize_options(), ) + if skip_compiling: + return ExportResult(quantize_job=quantize_job) - # 2. Compiles the model to an asset that can be run on device + # 3. Compiles the model to an asset that can be run on device model_compile_options = model.get_hub_compile_options( target_runtime, compile_options, hub_device ) print(f"Optimizing model {model_name} to run on-device") submitted_compile_job = hub.submit_compile_job( - model=source_model, + model=quantize_job.get_target_model(), input_specs=input_spec, device=hub_device, name=model_name, - calibration_data=model.get_calibration_data(target_runtime), options=model_compile_options, ) compile_job = cast(hub.client.CompileJob, submitted_compile_job) - # 3. Profiles the model performance on a real device + # 4. Profiles the model performance on a real device profile_job: Optional[hub.client.ProfileJob] = None if not skip_profiling: profile_options_all = model.get_hub_profile_options( @@ -149,7 +178,7 @@ def export_model( ) profile_job = cast(hub.client.ProfileJob, submitted_profile_job) - # 4. Inferences the model on sample inputs + # 5. Inferences the model on sample inputs inference_job: Optional[hub.client.InferenceJob] = None if not skip_inferencing: profile_options_all = model.get_hub_profile_options( @@ -170,13 +199,13 @@ def export_model( ) inference_job = cast(hub.client.InferenceJob, submitted_inference_job) - # 5. Downloads the model asset to the local directory + # 6. Downloads the model asset to the local directory if not skip_downloading: os.makedirs(output_path, exist_ok=True) target_model: hub.Model = compile_job.get_target_model() # type: ignore target_model.download(str(output_path / model_name)) - # 6. Summarizes the results from profiling and inference + # 7. Summarizes the results from profiling and inference if not skip_summary and not skip_profiling: assert profile_job is not None and profile_job.wait().success profile_data: dict[str, Any] = profile_job.download_profile() # type: ignore @@ -201,12 +230,13 @@ def export_model( compile_job=compile_job, inference_job=inference_job, profile_job=profile_job, + quantize_job=quantize_job, ) def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, is_hub_quantized=True) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/model.py b/qai_hub_models/models/quicksrnetmedium_quantized/model.py index 726a1f18..e72395d5 100644 --- a/qai_hub_models/models/quicksrnetmedium_quantized/model.py +++ b/qai_hub_models/models/quicksrnetmedium_quantized/model.py @@ -4,76 +4,11 @@ # --------------------------------------------------------------------- from __future__ import annotations -# isort: off -# This verifies aimet is installed, and this must be included first. -from qai_hub_models.utils.quantization_aimet import ( - AIMETQuantizableMixin, - constrain_quantized_inputs_to_image_range, -) - -# isort: on -import torch -from aimet_torch.cross_layer_equalization import equalize_model -from aimet_torch.model_preparer import prepare_model -from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim - -from qai_hub_models.models._shared.super_resolution.model import DEFAULT_SCALE_FACTOR from qai_hub_models.models.quicksrnetmedium.model import QuickSRNetMedium -from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset +from qai_hub_models.utils.quantization import HubQuantizableMixin MODEL_ID = __name__.split(".")[-2] -MODEL_ASSET_VERSION = 4 - -DEFAULT_ENCODINGS = "quicksrnetmedium_quantized_encodings.json" - - -class QuickSRNetMediumQuantizable(AIMETQuantizableMixin, QuickSRNetMedium): - """QuickSRNetMedium with post train quantization support. - Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints. - Support for quantizing using your own weights & data will come at a later date.""" - - def __init__( - self, - quicksrnet_model: QuantizationSimModel, - scale_factor: int, - ) -> None: - QuickSRNetMedium.__init__(self, quicksrnet_model.model, scale_factor) - AIMETQuantizableMixin.__init__(self, quicksrnet_model) - - @classmethod - def from_pretrained( - cls, - aimet_encodings: str | None = "DEFAULT", - scale_factor: int = DEFAULT_SCALE_FACTOR, - ) -> QuickSRNetMediumQuantizable: - """ - Parameters: - aimet_encodings: - if "DEFAULT": Loads the model with aimet encodings calibrated on BSD300. - elif None: Doesn't load any encodings. Used when computing encodings. - else: Interprets as a filepath and loads the encodings stored there. - """ - # Load Model - fp16_model = QuickSRNetMedium.from_pretrained(scale_factor) - input_shape = cls.get_input_spec()["image"][0] - model = prepare_model(fp16_model) - equalize_model(model, input_shape) - sim = QuantizationSimModel( - model, - quant_scheme="tf_enhanced", - default_param_bw=8, - default_output_bw=8, - config_file=get_default_aimet_config(), - dummy_input=torch.rand(input_shape), - ) - constrain_quantized_inputs_to_image_range(sim) - if aimet_encodings: - if aimet_encodings == "DEFAULT": - aimet_encodings = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_ENCODINGS - ).fetch() - load_encodings_to_sim(sim, aimet_encodings) - return cls(sim, scale_factor) +class QuickSRNetMediumQuantizable(HubQuantizableMixin, QuickSRNetMedium): + pass diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml b/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml index 432b4b21..fb12c036 100644 --- a/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml +++ b/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml @@ -49,11 +49,11 @@ models: - name: QuickSRNetMedium-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1120.0 - throughput: 892.8571428571429 + inference_time: 1118.0 + throughput: 894.4543828264758 estimated_peak_memory_range: - min: 12288 - max: 3209896 + min: 20480 + max: 5869248 primary_compute_unit: NPU precision: int8 layer_info: @@ -61,14 +61,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jglv30385 + job_id: jpedrnjv5 job_status: Passed torchscript_onnx_qnn: inference_time: 515.0 throughput: 1941.7475728155339 estimated_peak_memory_range: - min: 0 - max: 2323400 + min: 20480 + max: 2468336 primary_compute_unit: NPU precision: int8 layer_info: @@ -76,22 +76,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: j56yn3n0p + job_id: jprv47z7g job_status: Passed torchscript_onnx: - inference_time: 691.0 - throughput: 1447.178002894356 + inference_time: 5453.0 + throughput: 183.38529249954155 estimated_peak_memory_range: - min: 65536 - max: 1790520 + min: 13029376 + max: 14799984 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 19 + layers_on_npu: 61 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 19 - job_id: jp4lvxwv5 + layers_on_cpu: 14 + total_layers: 75 + job_id: jpv61n3r5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -100,13 +100,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:37:48Z' + timestamp: '2024-11-09T23:41:47Z' - torchscript_onnx_tflite: - inference_time: 1137.0 - throughput: 879.5074758135444 + inference_time: 893.0 + throughput: 1119.8208286674133 estimated_peak_memory_range: - min: 28672 - max: 1287784 + min: 20480 + max: 22402224 primary_compute_unit: NPU precision: int8 layer_info: @@ -114,14 +114,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: j5we2nmm5 + job_id: jgz3x01x5 job_status: Passed torchscript_onnx_qnn: - inference_time: 411.0 - throughput: 2433.0900243309 + inference_time: 358.0 + throughput: 2793.2960893854747 estimated_peak_memory_range: - min: 86016 - max: 1457688 + min: 65536 + max: 11817904 primary_compute_unit: NPU precision: int8 layer_info: @@ -129,22 +129,37 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jp3je4elg + job_id: jp2k7z2qp + job_status: Passed + torchscript_onnx: + inference_time: 4455.0 + throughput: 224.4668911335578 + estimated_peak_memory_range: + min: 14069760 + max: 49349184 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 61 + layers_on_gpu: 0 + layers_on_cpu: 14 + total_layers: 75 + job_id: jgjv08xeg job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:37:37Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T23:41:49Z' - torchscript_onnx_tflite: - inference_time: 887.0 - throughput: 1127.3957158962796 + inference_time: 1054.0 + throughput: 948.7666034155598 estimated_peak_memory_range: min: 12288 - max: 23277296 + max: 16054768 primary_compute_unit: NPU precision: int8 layer_info: @@ -152,14 +167,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jg9lje98g + job_id: j5wedrjm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 280.0 - throughput: 3571.4285714285716 + inference_time: 294.0 + throughput: 3401.360544217687 estimated_peak_memory_range: - min: 16384 - max: 8581264 + min: 61440 + max: 9881728 primary_compute_unit: NPU precision: int8 layer_info: @@ -167,30 +182,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jgo2313xp + job_id: jpy14y9lp job_status: Passed - reference_device_info: - name: Samsung Galaxy S24 - os: '14' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-31T14:37:38Z' - - torchscript_onnx_qnn: - inference_time: 345.0 - throughput: 2898.550724637681 + torchscript_onnx: + inference_time: 4229.0 + throughput: 236.46252069047057 estimated_peak_memory_range: - min: 61440 - max: 10145760 + min: 14704640 + max: 35794784 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 17 + layers_on_npu: 61 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 17 - job_id: jpv6v1vj5 + layers_on_cpu: 14 + total_layers: 75 + job_id: jpedrn9v5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -199,13 +206,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-31T14:37:39Z' + timestamp: '2024-11-09T23:41:52Z' - torchscript_onnx_tflite: - inference_time: 2217.0 - throughput: 451.05999097880016 + inference_time: 4399.0 + throughput: 227.32439190725165 estimated_peak_memory_range: - min: 12288 - max: 16218224 + min: 1617920 + max: 17062320 primary_compute_unit: NPU precision: int8 layer_info: @@ -213,14 +220,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jp14yxq7p + job_id: jg9l3q68g job_status: Passed torchscript_onnx_qnn: - inference_time: 1137.0 - throughput: 879.5074758135444 + inference_time: 1037.0 + throughput: 964.3201542912246 estimated_peak_memory_range: - min: 12288 - max: 7989408 + min: 65536 + max: 7956896 primary_compute_unit: NPU precision: int8 layer_info: @@ -228,7 +235,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jgjve0exg + job_id: jp0z1xnn5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -237,13 +244,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-31T14:37:40Z' + timestamp: '2024-11-09T23:41:24Z' - torchscript_onnx_tflite: - inference_time: 12919.0 - throughput: 77.40537193281213 + inference_time: 12711.0 + throughput: 78.6720163637794 estimated_peak_memory_range: - min: 1662976 - max: 4489816 + min: 1765376 + max: 9515240 primary_compute_unit: NPU precision: int8 layer_info: @@ -251,7 +258,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jgdxel7zp + job_id: jp14dmr7p job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -260,13 +267,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-26T23:32:30Z' + timestamp: '2024-11-09T23:40:55Z' - torchscript_onnx_tflite: - inference_time: 1127.0 - throughput: 887.3114463176574 + inference_time: 1114.0 + throughput: 897.6660682226212 estimated_peak_memory_range: - min: 815104 - max: 2207432 + min: 32768 + max: 1406552 primary_compute_unit: NPU precision: int8 layer_info: @@ -274,14 +281,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: j5we2nm45 + job_id: jgdxrmjzp job_status: Passed torchscript_onnx_qnn: - inference_time: 415.0 - throughput: 2409.6385542168673 + inference_time: 412.0 + throughput: 2427.1844660194174 estimated_peak_memory_range: min: 81920 - max: 1799672 + max: 1354448 primary_compute_unit: NPU precision: int8 layer_info: @@ -289,7 +296,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jpedkrk15 + job_id: jp8q3klop job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -298,13 +305,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-31T14:37:41Z' + timestamp: '2024-11-09T23:41:26Z' - torchscript_onnx_tflite: - inference_time: 1193.0 - throughput: 838.2229673093043 + inference_time: 1111.0 + throughput: 900.0900090009001 estimated_peak_memory_range: - min: 16384 - max: 10459072 + min: 24576 + max: 10234104 primary_compute_unit: NPU precision: int8 layer_info: @@ -312,14 +319,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jpy1jey4p + job_id: j57yj8q95 job_status: Passed torchscript_onnx_qnn: - inference_time: 417.0 - throughput: 2398.0815347721823 + inference_time: 416.0 + throughput: 2403.846153846154 estimated_peak_memory_range: - min: 98304 - max: 1516768 + min: 73728 + max: 1303608 primary_compute_unit: NPU precision: int8 layer_info: @@ -327,7 +334,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: j5q6r8dmp + job_id: j5q67djop job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -336,13 +343,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:21:40Z' + timestamp: '2024-11-09T23:41:32Z' - torchscript_onnx_tflite: - inference_time: 1115.0 - throughput: 896.8609865470852 + inference_time: 1150.0 + throughput: 869.5652173913044 estimated_peak_memory_range: - min: 28672 - max: 1502512 + min: 24576 + max: 3140984 primary_compute_unit: NPU precision: int8 layer_info: @@ -350,14 +357,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jp0z2yxe5 + job_id: jp4lx2z15 job_status: Passed torchscript_onnx_qnn: inference_time: 412.0 throughput: 2427.1844660194174 estimated_peak_memory_range: - min: 77824 - max: 1343560 + min: 81920 + max: 2383768 primary_compute_unit: NPU precision: int8 layer_info: @@ -365,7 +372,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jglv2nql5 + job_id: jglv0qnm5 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -374,13 +381,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:21:41Z' + timestamp: '2024-11-09T23:41:35Z' - torchscript_onnx_tflite: - inference_time: 1108.0 - throughput: 902.5270758122743 + inference_time: 1119.0 + throughput: 893.6550491510277 estimated_peak_memory_range: - min: 28672 - max: 1335096 + min: 32768 + max: 1416464 primary_compute_unit: NPU precision: int8 layer_info: @@ -388,14 +395,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jgkeqzkog + job_id: jpxk7zwl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 407.0 - throughput: 2457.002457002457 + inference_time: 414.0 + throughput: 2415.458937198068 estimated_peak_memory_range: min: 81920 - max: 1406392 + max: 1334616 primary_compute_unit: NPU precision: int8 layer_info: @@ -403,7 +410,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jgo2ny9dp + job_id: j56y306yp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -412,13 +419,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:21:43Z' + timestamp: '2024-11-09T23:41:38Z' - torchscript_onnx_tflite: - inference_time: 1899.0 - throughput: 526.592943654555 + inference_time: 1918.0 + throughput: 521.376433785193 estimated_peak_memory_range: - min: 462848 - max: 14627728 + min: 1593344 + max: 15782992 primary_compute_unit: NPU precision: int8 layer_info: @@ -426,14 +433,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jp8qmok8p + job_id: j5mnwlj9p job_status: Passed torchscript_onnx_qnn: - inference_time: 1055.0 - throughput: 947.8672985781991 + inference_time: 973.0 + throughput: 1027.749229188078 estimated_peak_memory_range: - min: 12288 - max: 5667088 + min: 16384 + max: 5869680 primary_compute_unit: NPU precision: int8 layer_info: @@ -441,7 +448,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: j56yz607p + job_id: jp3j4rkng job_status: Passed reference_device_info: name: SA8295P ADP @@ -450,13 +457,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T01:21:42Z' - - torchscript_onnx_qnn: - inference_time: 583.0 - throughput: 1715.2658662092624 + timestamp: '2024-11-09T23:41:40Z' + - torchscript_onnx_tflite: + inference_time: 2218.0 + throughput: 450.8566275924256 estimated_peak_memory_range: - min: 65536 - max: 13466208 + min: 16384 + max: 24145344 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 16 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 19 + job_id: jgn69wjq5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 579.0 + throughput: 1727.1157167530225 + estimated_peak_memory_range: + min: 73728 + max: 12259152 primary_compute_unit: NPU precision: int8 layer_info: @@ -464,7 +486,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: j57yxjdl5 + job_id: jgo219ykp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -473,13 +495,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-31T14:37:47Z' + timestamp: '2024-11-09T23:41:44Z' - torchscript_onnx_qnn: - inference_time: 615.0 - throughput: 1626.0162601626016 + inference_time: 506.0 + throughput: 1976.2845849802372 estimated_peak_memory_range: - min: 237568 - max: 237568 + min: 69632 + max: 69632 primary_compute_unit: NPU precision: int8 layer_info: @@ -487,7 +509,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jgz3rxrk5 + job_id: jgkelkjng + job_status: Passed + torchscript_onnx: + inference_time: 5474.0 + throughput: 182.68176835951772 + estimated_peak_memory_range: + min: 15544320 + max: 15544320 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 61 + layers_on_gpu: 0 + layers_on_cpu: 14 + total_layers: 75 + job_id: jgz3x0ex5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -496,4 +533,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-31T14:37:42Z' + timestamp: '2024-11-09T23:41:55Z' diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/test.py b/qai_hub_models/models/quicksrnetmedium_quantized/test.py deleted file mode 100644 index 6f29e238..00000000 --- a/qai_hub_models/models/quicksrnetmedium_quantized/test.py +++ /dev/null @@ -1,69 +0,0 @@ -# --------------------------------------------------------------------- -# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -# --------------------------------------------------------------------- - -import numpy as np -import pytest -import torch - -from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp -from qai_hub_models.models._shared.super_resolution.demo import IMAGE_ADDRESS -from qai_hub_models.models.quicksrnetmedium.model import MODEL_ASSET_VERSION, MODEL_ID -from qai_hub_models.models.quicksrnetmedium_quantized.demo import main as demo_main -from qai_hub_models.models.quicksrnetmedium_quantized.model import ( - QuickSRNetMediumQuantizable, -) -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image -from qai_hub_models.utils.testing import assert_most_close, skip_clone_repo_check - -OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetmedium_demo_output.png" -) - - -@skip_clone_repo_check -def test_task(): - # AIMET Quantization Simulator introduces randomness. Eliminate that for this test. - torch.manual_seed(0) - image = load_image(IMAGE_ADDRESS) - model = QuickSRNetMediumQuantizable.from_pretrained() - app = SuperResolutionApp(model=model) - app_output_image = app.predict(image)[0] - - output_image = load_image(OUTPUT_IMAGE_ADDRESS) - assert_most_close( - np.asarray(app_output_image, dtype=np.float32) / 255, - np.asarray(output_image, dtype=np.float32) / 255, - diff_tol=0.005, - rtol=0.02, - atol=0.2, - ) - - -@pytest.mark.trace -@skip_clone_repo_check -def test_trace(): - image = load_image(IMAGE_ADDRESS) - output_image = load_image(OUTPUT_IMAGE_ADDRESS) - app = SuperResolutionApp( - QuickSRNetMediumQuantizable.from_pretrained().convert_to_torchscript() - ) - app_output_image = app.predict(image)[0] - - assert_most_close( - np.asarray(app_output_image, dtype=np.float32) / 255, - np.asarray(output_image, dtype=np.float32) / 255, - diff_tol=0.005, - rtol=0.02, - atol=0.2, - ) - - -@skip_clone_repo_check -def test_demo(): - demo_main(is_test=True) - - -if __name__ == "__main__": - test_task() diff --git a/qai_hub_models/models/quicksrnetsmall/README.md b/qai_hub_models/models/quicksrnetsmall/README.md index 17dc918e..ac8194ca 100644 --- a/qai_hub_models/models/quicksrnetsmall/README.md +++ b/qai_hub_models/models/quicksrnetsmall/README.md @@ -5,8 +5,7 @@ QuickSRNet Small is designed for upscaling images on mobile platforms to sharpen in real-time. -This is based on the implementation of QuickSRNetSmall found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/quicksrnetsmall). diff --git a/qai_hub_models/models/quicksrnetsmall/evaluate.py b/qai_hub_models/models/quicksrnetsmall/evaluate.py new file mode 100644 index 00000000..37c8745c --- /dev/null +++ b/qai_hub_models/models/quicksrnetsmall/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.quicksrnetsmall import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["bsd300"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=100, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/quicksrnetsmall/perf.yaml b/qai_hub_models/models/quicksrnetsmall/perf.yaml index b17ccbf5..6bf73565 100644 --- a/qai_hub_models/models/quicksrnetsmall/perf.yaml +++ b/qai_hub_models/models/quicksrnetsmall/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: QuickSRNetSmall performance_metrics: - torchscript_onnx_tflite: - inference_time: 1301.0 - throughput: 768.6395080707148 + inference_time: 1344.0 + throughput: 744.047619047619 estimated_peak_memory_range: - min: 61440 - max: 20689208 + min: 28672 + max: 1429040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jgo2kq1kp + job_id: jp4lxyrq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1020.0 - throughput: 980.3921568627451 + inference_time: 1003.0 + throughput: 997.0089730807578 estimated_peak_memory_range: - min: 217088 - max: 71326440 + min: 12288 + max: 2448424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: j57y03j95 + job_id: jgkel2eyg job_status: Passed torchscript_onnx: - inference_time: 1482.0 - throughput: 674.7638326585695 + inference_time: 1431.0 + throughput: 698.8120195667366 estimated_peak_memory_range: - min: 217088 - max: 65112088 + min: 212992 + max: 1540288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 13 - job_id: jgke7mlng + job_id: j5wedlez5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:32:06Z' + timestamp: '2024-11-09T22:20:46Z' - torchscript_onnx_tflite: - inference_time: 1307.0 - throughput: 765.1109410864575 + inference_time: 850.0 + throughput: 1176.4705882352941 estimated_peak_memory_range: - min: 20480 - max: 20978232 + min: 16384 + max: 20368704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jpv60x1r5 + job_id: jpxk7loj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1013.0 - throughput: 987.1668311944719 + inference_time: 621.0 + throughput: 1610.3059581320451 estimated_peak_memory_range: - min: 16384 - max: 71087440 + min: 208896 + max: 10419152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jp4lk0x15 + job_id: j5q67l67p job_status: Passed torchscript_onnx: - inference_time: 1434.0 - throughput: 697.350069735007 + inference_time: 952.0 + throughput: 1050.420168067227 estimated_peak_memory_range: - min: 12288 - max: 1365952 + min: 0 + max: 21979344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 13 - job_id: j5q6eo7op + job_id: jg9l3zlqg job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:32:07Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T22:20:47Z' - torchscript_onnx_tflite: - inference_time: 821.0 - throughput: 1218.026796589525 + inference_time: 915.0 + throughput: 1092.896174863388 estimated_peak_memory_range: - min: 20480 - max: 21532272 + min: 12288 + max: 14854560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jgjvz40eg + job_id: j5mnw0xyp job_status: Passed torchscript_onnx_qnn: - inference_time: 788.0 - throughput: 1269.0355329949239 + inference_time: 670.0 + throughput: 1492.5373134328358 estimated_peak_memory_range: - min: 208896 - max: 11754496 + min: 0 + max: 8697520 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jpxkn27l5 + job_id: jglv0yve5 job_status: Passed torchscript_onnx: - inference_time: 1256.0 - throughput: 796.1783439490446 + inference_time: 798.0 + throughput: 1253.1328320802006 estimated_peak_memory_range: min: 0 - max: 21926704 + max: 14862016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 13 - job_id: jglv6rem5 + job_id: jp14dn4kp job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:32:08Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T22:20:48Z' - torchscript_onnx_tflite: - inference_time: 744.0 - throughput: 1344.0860215053763 + inference_time: 1298.0 + throughput: 770.4160246533128 estimated_peak_memory_range: min: 12288 - max: 14673488 + max: 8730560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jgdxelrzp + job_id: jgn69z6v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 667.0 - throughput: 1499.2503748125937 + inference_time: 839.0 + throughput: 1191.8951132300358 estimated_peak_memory_range: - min: 0 - max: 8945104 + min: 229376 + max: 1795776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jp8q0v3op - job_status: Passed - torchscript_onnx: - inference_time: 975.0 - throughput: 1025.6410256410256 - estimated_peak_memory_range: - min: 0 - max: 15067664 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 13 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 13 - job_id: jp3jv2qng + job_id: j56y38yvp job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:32:10Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T22:20:38Z' - torchscript_onnx_tflite: - inference_time: 1318.0 - throughput: 758.7253414264036 + inference_time: 1326.0 + throughput: 754.1478129713424 estimated_peak_memory_range: - min: 28672 - max: 1388240 + min: 1421312 + max: 2721928 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jpede3rv5 + job_id: jprv4lvvg job_status: Passed torchscript_onnx_qnn: inference_time: 861.0 throughput: 1161.4401858304298 estimated_peak_memory_range: - min: 20480 - max: 4070432 + min: 221184 + max: 1512496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: j5mnqyw9p + job_id: jgo21l24p job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:31:58Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T22:20:41Z' - torchscript_onnx_tflite: - inference_time: 1320.0 - throughput: 757.5757575757576 + inference_time: 1297.0 + throughput: 771.0100231303007 estimated_peak_memory_range: - min: 28672 - max: 9362632 + min: 16384 + max: 8842664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jp14w01lp + job_id: jp2k7rkxp job_status: Passed torchscript_onnx_qnn: - inference_time: 882.0 - throughput: 1133.7868480725624 + inference_time: 868.0 + throughput: 1152.073732718894 estimated_peak_memory_range: - min: 225280 - max: 2021128 + min: 36864 + max: 4300584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jp4ldq2l5 + job_id: jpv61l675 job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:21:20Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T22:20:42Z' - torchscript_onnx_tflite: - inference_time: 1374.0 - throughput: 727.802037845706 + inference_time: 1308.0 + throughput: 764.525993883792 estimated_peak_memory_range: - min: 24576 - max: 63789216 + min: 32768 + max: 1572296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jgdxqw4lp + job_id: jpy14o1rp job_status: Passed torchscript_onnx_qnn: - inference_time: 866.0 - throughput: 1154.7344110854503 + inference_time: 852.0 + throughput: 1173.7089201877934 estimated_peak_memory_range: - min: 237568 - max: 1922424 + min: 229376 + max: 1775160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jpxk6vz95 + job_id: jgjv0rv7g job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:21:21Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T22:20:43Z' - torchscript_onnx_tflite: - inference_time: 1320.0 - throughput: 757.5757575757576 + inference_time: 2066.0 + throughput: 484.027105517909 estimated_peak_memory_range: - min: 16384 - max: 8518216 + min: 6307840 + max: 19694752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: j57ylz8r5 + job_id: jp0z1mz25 job_status: Passed torchscript_onnx_qnn: - inference_time: 860.0 - throughput: 1162.7906976744187 + inference_time: 1795.0 + throughput: 557.1030640668523 estimated_peak_memory_range: - min: 53248 - max: 1506080 + min: 0 + max: 5949968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: j5mn6rlqp + job_id: jpedr7d75 job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:21:22Z' + chipset: SA8295P + timestamp: '2024-11-09T22:20:44Z' - torchscript_onnx_tflite: - inference_time: 1904.0 - throughput: 525.2100840336135 + inference_time: 3414.0 + throughput: 292.91154071470413 estimated_peak_memory_range: - min: 16384 - max: 21341968 + min: 6307840 + max: 27861344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jp14yxd7p + job_id: jp8q3eqzp job_status: Passed torchscript_onnx_qnn: - inference_time: 1147.0 - throughput: 871.8395815170009 + inference_time: 1111.0 + throughput: 900.0900090009001 estimated_peak_memory_range: min: 208896 - max: 13307648 + max: 13180720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jp0z371n5 + job_id: jgz3xl3z5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,13 +428,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:32:04Z' + timestamp: '2024-11-09T22:20:45Z' - torchscript_onnx_qnn: - inference_time: 972.0 - throughput: 1028.80658436214 + inference_time: 985.0 + throughput: 1015.2284263959391 estimated_peak_memory_range: - min: 221184 - max: 221184 + min: 204800 + max: 204800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jgn6l89q5 + job_id: jp3j4zjxg job_status: Passed torchscript_onnx: - inference_time: 1439.0 - throughput: 694.9270326615705 + inference_time: 1445.0 + throughput: 692.0415224913495 estimated_peak_memory_range: - min: 8835072 - max: 8835072 + min: 8847360 + max: 8847360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 13 - job_id: j56yelqyp + job_id: jgdxrdxkp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:32:09Z' + timestamp: '2024-11-09T22:20:49Z' diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/README.md b/qai_hub_models/models/quicksrnetsmall_quantized/README.md index 415b8d00..115e0737 100644 --- a/qai_hub_models/models/quicksrnetsmall_quantized/README.md +++ b/qai_hub_models/models/quicksrnetsmall_quantized/README.md @@ -5,8 +5,7 @@ QuickSRNet Small is designed for upscaling images on mobile platforms to sharpen in real-time. -This is based on the implementation of QuickSRNetSmall-Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/quicksrnetsmall_quantized). diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/conftest.py b/qai_hub_models/models/quicksrnetsmall_quantized/conftest.py index a8029b34..ab962fcb 100644 --- a/qai_hub_models/models/quicksrnetsmall_quantized/conftest.py +++ b/qai_hub_models/models/quicksrnetsmall_quantized/conftest.py @@ -9,7 +9,6 @@ import pytest from qai_hub_models.models.quicksrnetsmall_quantized import Model -from qai_hub_models.utils.testing import skip_clone_repo_check # Instantiate the model only once for all tests. @@ -22,7 +21,6 @@ def cached_from_pretrained(): from_pretrained = Model.from_pretrained sig = inspect.signature(from_pretrained) - @skip_clone_repo_check def _cached_from_pretrained(*args, **kwargs): cache_key = str(args) + str(kwargs) model = pretrained_cache.get(cache_key, None) diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/evaluate.py b/qai_hub_models/models/quicksrnetsmall_quantized/evaluate.py new file mode 100644 index 00000000..f541301d --- /dev/null +++ b/qai_hub_models/models/quicksrnetsmall_quantized/evaluate.py @@ -0,0 +1,55 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.quicksrnetsmall_quantized import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["bsd300"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=100, + supported_datasets=SUPPORTED_DATASETS, + is_hub_quantized=True, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/export.py b/qai_hub_models/models/quicksrnetsmall_quantized/export.py index f6b007cc..a1038e9c 100644 --- a/qai_hub_models/models/quicksrnetsmall_quantized/export.py +++ b/qai_hub_models/models/quicksrnetsmall_quantized/export.py @@ -13,6 +13,7 @@ from typing import Any, Optional, cast import qai_hub as hub +import torch from qai_hub_models.models.common import ExportResult, TargetRuntime from qai_hub_models.models.quicksrnetsmall_quantized import Model @@ -22,6 +23,7 @@ get_model_kwargs, ) from qai_hub_models.utils.compare import torch_inference +from qai_hub_models.utils.input_spec import make_torch_inputs from qai_hub_models.utils.printing import ( print_inference_metrics, print_on_target_demo_cmd, @@ -31,11 +33,14 @@ can_access_qualcomm_ai_hub, export_without_hub_access, ) +from qai_hub_models.utils.quantization import get_calibration_data def export_model( device: Optional[str] = None, chipset: Optional[str] = None, + num_calibration_samples: int = 100, + skip_compiling: bool = False, skip_profiling: bool = False, skip_inferencing: bool = False, skip_downloading: bool = False, @@ -50,13 +55,14 @@ def export_model( This function executes the following recipe: 1. Instantiates a PyTorch model and converts it to a traced TorchScript format - 2. Compiles the model to an asset that can be run on device - 3. Profiles the model performance on a real device - 4. Inferences the model on sample inputs - 5. Downloads the model asset to the local directory - 6. Summarizes the results from profiling and inference + 2. Converts the PyTorch model to ONNX and quantizes the ONNX model. + 3. Compiles the model to an asset that can be run on device + 4. Profiles the model performance on a real device + 5. Inferences the model on sample inputs + 6. Downloads the model asset to the local directory + 7. Summarizes the results from profiling and inference - Each of the last 4 steps can be optionally skipped using the input options. + Each of the last 5 steps can be optionally skipped using the input options. Parameters: device: Device for which to export the model. @@ -64,6 +70,9 @@ def export_model( Defaults to DEFAULT_DEVICE if not specified. chipset: If set, will choose a random device with this chipset. Overrides the `device` argument. + num_calibration_samples: The number of calibration data samples + to use for quantization. + skip_compiling: If set, skips compiling model to format that can run on device. skip_profiling: If set, skips profiling of compiled model on real devices. skip_inferencing: If set, skips computing on-device outputs from sample data. skip_downloading: If set, skips downloading of compiled model. @@ -79,9 +88,10 @@ def export_model( Returns: A struct of: - * A CompileJob object containing metadata about the compile job submitted to hub. + * A CompileJob object containing metadata about the compile job submitted to hub (None if compiling skipped). * An InferenceJob containing metadata about the inference job (None if inferencing skipped). * A ProfileJob containing metadata about the profile job (None if profiling skipped). + * A QuantizeJob object containing metadata about the quantize job submitted to hub """ model_name = "quicksrnetsmall_quantized" output_path = Path(output_dir or Path.cwd() / "build" / model_name) @@ -115,26 +125,45 @@ def export_model( ) # Trace the model - source_model = model.convert_to_hub_source_model( - target_runtime, output_path, input_spec + source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) + + print(f"Quantizing model {model_name} with {num_calibration_samples} samples.") + # 2. Converts the PyTorch model to ONNX and quantizes the ONNX model. + onnx_compile_job = hub.submit_compile_job( + model=source_model, + input_specs=input_spec, + device=hub_device, + name=model_name, + options="--target_runtime onnx", + ) + quantize_job = hub.submit_quantize_job( + model=onnx_compile_job.get_target_model(), + calibration_data=get_calibration_data( + input_spec, "bsd300", num_calibration_samples + ), + weights_dtype=model.get_weights_dtype(), + activations_dtype=model.get_activations_dtype(), + name=model_name, + options=model.get_quantize_options(), ) + if skip_compiling: + return ExportResult(quantize_job=quantize_job) - # 2. Compiles the model to an asset that can be run on device + # 3. Compiles the model to an asset that can be run on device model_compile_options = model.get_hub_compile_options( target_runtime, compile_options, hub_device ) print(f"Optimizing model {model_name} to run on-device") submitted_compile_job = hub.submit_compile_job( - model=source_model, + model=quantize_job.get_target_model(), input_specs=input_spec, device=hub_device, name=model_name, - calibration_data=model.get_calibration_data(target_runtime), options=model_compile_options, ) compile_job = cast(hub.client.CompileJob, submitted_compile_job) - # 3. Profiles the model performance on a real device + # 4. Profiles the model performance on a real device profile_job: Optional[hub.client.ProfileJob] = None if not skip_profiling: profile_options_all = model.get_hub_profile_options( @@ -149,7 +178,7 @@ def export_model( ) profile_job = cast(hub.client.ProfileJob, submitted_profile_job) - # 4. Inferences the model on sample inputs + # 5. Inferences the model on sample inputs inference_job: Optional[hub.client.InferenceJob] = None if not skip_inferencing: profile_options_all = model.get_hub_profile_options( @@ -170,13 +199,13 @@ def export_model( ) inference_job = cast(hub.client.InferenceJob, submitted_inference_job) - # 5. Downloads the model asset to the local directory + # 6. Downloads the model asset to the local directory if not skip_downloading: os.makedirs(output_path, exist_ok=True) target_model: hub.Model = compile_job.get_target_model() # type: ignore target_model.download(str(output_path / model_name)) - # 6. Summarizes the results from profiling and inference + # 7. Summarizes the results from profiling and inference if not skip_summary and not skip_profiling: assert profile_job is not None and profile_job.wait().success profile_data: dict[str, Any] = profile_job.download_profile() # type: ignore @@ -201,12 +230,13 @@ def export_model( compile_job=compile_job, inference_job=inference_job, profile_job=profile_job, + quantize_job=quantize_job, ) def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, is_hub_quantized=True) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/model.py b/qai_hub_models/models/quicksrnetsmall_quantized/model.py index a5e335df..2cbf30ca 100644 --- a/qai_hub_models/models/quicksrnetsmall_quantized/model.py +++ b/qai_hub_models/models/quicksrnetsmall_quantized/model.py @@ -4,76 +4,11 @@ # --------------------------------------------------------------------- from __future__ import annotations -# isort: off -# This verifies aimet is installed, and this must be included first. -from qai_hub_models.utils.quantization_aimet import ( - AIMETQuantizableMixin, - constrain_quantized_inputs_to_image_range, -) - -# isort: on -import torch -from aimet_torch.cross_layer_equalization import equalize_model -from aimet_torch.model_preparer import prepare_model -from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim - -from qai_hub_models.models._shared.super_resolution.model import DEFAULT_SCALE_FACTOR from qai_hub_models.models.quicksrnetsmall.model import QuickSRNetSmall -from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset +from qai_hub_models.utils.quantization import HubQuantizableMixin MODEL_ID = __name__.split(".")[-2] -MODEL_ASSET_VERSION = 4 - -DEFAULT_ENCODINGS = "quicksrnetsmall_quantized_encodings.json" - - -class QuickSRNetSmallQuantizable(AIMETQuantizableMixin, QuickSRNetSmall): - """QuickSRNetSmall with post train quantization support. - Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints. - Support for quantizing using your own weights & data will come at a later date.""" - - def __init__( - self, - quicksrnet_model: QuantizationSimModel, - scale_factor: int, - ) -> None: - QuickSRNetSmall.__init__(self, quicksrnet_model.model, scale_factor) - AIMETQuantizableMixin.__init__(self, quicksrnet_model) - - @classmethod - def from_pretrained( - cls, - aimet_encodings: str | None = "DEFAULT", - scale_factor: int = DEFAULT_SCALE_FACTOR, - ) -> QuickSRNetSmallQuantizable: - """ - Parameters: - aimet_encodings: - if "DEFAULT": Loads the model with aimet encodings calibrated on BSD300. - elif None: Doesn't load any encodings. Used when computing encodings. - else: Interprets as a filepath and loads the encodings stored there. - """ - # Load Model - fp16_model = QuickSRNetSmall.from_pretrained(scale_factor) - input_shape = cls.get_input_spec()["image"][0] - model = prepare_model(fp16_model) - equalize_model(model, input_shape) - sim = QuantizationSimModel( - fp16_model, - quant_scheme="tf_enhanced", - default_param_bw=8, - default_output_bw=8, - config_file=get_default_aimet_config(), - dummy_input=torch.rand(input_shape), - ) - constrain_quantized_inputs_to_image_range(sim) - if aimet_encodings: - if aimet_encodings == "DEFAULT": - aimet_encodings = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_ENCODINGS - ).fetch() - load_encodings_to_sim(sim, aimet_encodings) - return cls(sim, scale_factor) +class QuickSRNetSmallQuantizable(HubQuantizableMixin, QuickSRNetSmall): + pass diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml b/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml index e3fa6bb7..31fd4c62 100644 --- a/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml +++ b/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml @@ -49,11 +49,11 @@ models: - name: QuickSRNetSmall-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1088.0 - throughput: 919.1176470588235 + inference_time: 1074.0 + throughput: 931.0986964618249 estimated_peak_memory_range: min: 24576 - max: 3711104 + max: 3053712 primary_compute_unit: NPU precision: int8 layer_info: @@ -61,14 +61,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 13 - job_id: jp2k0nlqp + job_id: jpv61n8k5 job_status: Passed torchscript_onnx_qnn: - inference_time: 464.0 - throughput: 2155.1724137931033 + inference_time: 502.0 + throughput: 1992.03187250996 estimated_peak_memory_range: - min: 24576 - max: 10744032 + min: 73728 + max: 10896624 primary_compute_unit: NPU precision: int8 layer_info: @@ -76,75 +76,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jpede3vv5 + job_id: jgdxrmkzp job_status: Passed torchscript_onnx: - inference_time: 675.0 - throughput: 1481.4814814814815 - estimated_peak_memory_range: - min: 65536 - max: 1429728 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 13 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 13 - job_id: jp2k0nmqp - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:31:25Z' - - torchscript_onnx_tflite: - inference_time: 1075.0 - throughput: 930.2325581395348 + inference_time: 3581.0 + throughput: 279.2516056967328 estimated_peak_memory_range: min: 12288 - max: 10120624 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 10 - layers_on_gpu: 0 - layers_on_cpu: 3 - total_layers: 13 - job_id: jpy1r06lp - job_status: Passed - torchscript_onnx_qnn: - inference_time: 469.0 - throughput: 2132.1961620469083 - estimated_peak_memory_range: - min: 69632 - max: 2252872 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 11 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 11 - job_id: jgz3ok7x5 - job_status: Passed - torchscript_onnx: - inference_time: 662.0 - throughput: 1510.5740181268882 - estimated_peak_memory_range: - min: 69632 - max: 1468528 + max: 1947424 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 13 + layers_on_npu: 37 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 13 - job_id: jpy1r0dlp + layers_on_cpu: 8 + total_layers: 45 + job_id: jgkelk1ng job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -153,13 +100,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:31:26Z' + timestamp: '2024-11-09T23:40:08Z' - torchscript_onnx_tflite: - inference_time: 901.0 - throughput: 1109.8779134295228 + inference_time: 865.0 + throughput: 1156.0693641618498 estimated_peak_memory_range: - min: 16384 - max: 21490416 + min: 20480 + max: 21851152 primary_compute_unit: NPU precision: int8 layer_info: @@ -167,14 +114,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 13 - job_id: jp0z37ln5 + job_id: jgjv089vg job_status: Passed torchscript_onnx_qnn: - inference_time: 321.0 - throughput: 3115.264797507788 + inference_time: 325.0 + throughput: 3076.923076923077 estimated_peak_memory_range: min: 65536 - max: 10636656 + max: 10564800 primary_compute_unit: NPU precision: int8 layer_info: @@ -182,22 +129,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: j5we2n9m5 + job_id: j57yj8m95 job_status: Passed torchscript_onnx: - inference_time: 633.0 - throughput: 1579.778830963665 + inference_time: 2715.0 + throughput: 368.3241252302026 estimated_peak_memory_range: min: 0 - max: 21820608 + max: 30352752 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 13 + layers_on_npu: 37 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 13 - job_id: jp0z37rn5 + layers_on_cpu: 8 + total_layers: 45 + job_id: j5q67dnop job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -206,13 +153,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:31:27Z' + timestamp: '2024-11-09T23:40:11Z' - torchscript_onnx_tflite: - inference_time: 1140.0 - throughput: 877.1929824561404 + inference_time: 828.0 + throughput: 1207.729468599034 estimated_peak_memory_range: - min: 12288 - max: 14642368 + min: 8192 + max: 14291584 primary_compute_unit: NPU precision: int8 layer_info: @@ -220,14 +167,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 13 - job_id: jpv60xwr5 + job_id: jpedrnqo5 job_status: Passed torchscript_onnx_qnn: - inference_time: 344.0 - throughput: 2906.9767441860463 + inference_time: 282.0 + throughput: 3546.099290780142 estimated_peak_memory_range: min: 61440 - max: 9342080 + max: 9396000 primary_compute_unit: NPU precision: int8 layer_info: @@ -235,22 +182,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jgn6l8eq5 + job_id: jp4lx2715 job_status: Passed torchscript_onnx: - inference_time: 440.0 - throughput: 2272.7272727272725 + inference_time: 2618.0 + throughput: 381.9709702062643 estimated_peak_memory_range: - min: 0 - max: 14222992 + min: 8192 + max: 18623664 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 13 + layers_on_npu: 37 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 13 - job_id: jgke7myng + layers_on_cpu: 8 + total_layers: 45 + job_id: jglv0qjm5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -259,13 +206,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:31:29Z' + timestamp: '2024-11-09T23:40:14Z' - torchscript_onnx_tflite: - inference_time: 2862.0 - throughput: 349.4060097833683 + inference_time: 2109.0 + throughput: 474.158368895211 estimated_peak_memory_range: min: 12288 - max: 15665568 + max: 15287520 primary_compute_unit: NPU precision: int8 layer_info: @@ -273,14 +220,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 13 - job_id: jp8q0vzop + job_id: jgz3x06o5 job_status: Passed torchscript_onnx_qnn: - inference_time: 957.0 - throughput: 1044.932079414838 + inference_time: 1096.0 + throughput: 912.4087591240876 estimated_peak_memory_range: - min: 86016 - max: 7821392 + min: 90112 + max: 8355600 primary_compute_unit: NPU precision: int8 layer_info: @@ -288,7 +235,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jg9lje48g + job_id: jpxk7zql5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -297,13 +244,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-26T23:31:15Z' + timestamp: '2024-11-09T23:39:45Z' - torchscript_onnx_tflite: - inference_time: 10715.0 - throughput: 93.32711152589827 + inference_time: 11377.0 + throughput: 87.8966335589347 estimated_peak_memory_range: - min: 1818624 - max: 12286016 + min: 1798144 + max: 5739400 primary_compute_unit: NPU precision: int8 layer_info: @@ -311,7 +258,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 13 - job_id: jgke7m3ng + job_id: j5wedrk35 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -320,13 +267,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-26T23:31:03Z' + timestamp: '2024-11-09T23:39:18Z' - torchscript_onnx_tflite: - inference_time: 1092.0 - throughput: 915.7509157509157 + inference_time: 1058.0 + throughput: 945.179584120983 estimated_peak_memory_range: - min: 16384 - max: 3081976 + min: 20480 + max: 1438520 primary_compute_unit: NPU precision: int8 layer_info: @@ -334,14 +281,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 13 - job_id: j5q6eo3op + job_id: jg9l3qrwg job_status: Passed torchscript_onnx_qnn: - inference_time: 389.0 - throughput: 2570.694087403599 + inference_time: 392.0 + throughput: 2551.0204081632655 estimated_peak_memory_range: min: 73728 - max: 1380392 + max: 1370280 primary_compute_unit: NPU precision: int8 layer_info: @@ -349,7 +296,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jp14yx87p + job_id: j5mnwl79p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -358,13 +305,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:31:16Z' + timestamp: '2024-11-09T23:39:48Z' - torchscript_onnx_tflite: - inference_time: 1070.0 - throughput: 934.5794392523364 + inference_time: 1064.0 + throughput: 939.8496240601504 estimated_peak_memory_range: - min: 32768 - max: 1342440 + min: 28672 + max: 1341520 primary_compute_unit: NPU precision: int8 layer_info: @@ -372,14 +319,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 13 - job_id: j5q6r81mp + job_id: jp14dm98p job_status: Passed torchscript_onnx_qnn: - inference_time: 392.0 - throughput: 2551.0204081632655 + inference_time: 388.0 + throughput: 2577.319587628866 estimated_peak_memory_range: - min: 73728 - max: 1594248 + min: 45056 + max: 1434880 primary_compute_unit: NPU precision: int8 layer_info: @@ -387,7 +334,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jgo2nywdp + job_id: jprv47r7g job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -396,13 +343,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:21:01Z' + timestamp: '2024-11-09T23:39:54Z' - torchscript_onnx_tflite: - inference_time: 1077.0 - throughput: 928.5051067780872 + inference_time: 1082.0 + throughput: 924.2144177449168 estimated_peak_memory_range: - min: 24576 - max: 1338864 + min: 28672 + max: 2965752 primary_compute_unit: NPU precision: int8 layer_info: @@ -410,14 +357,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 13 - job_id: jglv2n8l5 + job_id: jgdxrmkrp job_status: Passed torchscript_onnx_qnn: - inference_time: 389.0 - throughput: 2570.694087403599 + inference_time: 391.0 + throughput: 2557.544757033248 estimated_peak_memory_range: min: 81920 - max: 1365944 + max: 1609040 primary_compute_unit: NPU precision: int8 layer_info: @@ -425,7 +372,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jpv6r3mm5 + job_id: jp2k7z1qp job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -434,13 +381,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:21:02Z' + timestamp: '2024-11-09T23:39:57Z' - torchscript_onnx_tflite: - inference_time: 1076.0 - throughput: 929.368029739777 + inference_time: 2623.0 + throughput: 381.2428516965307 estimated_peak_memory_range: - min: 28672 - max: 1401232 + min: 815104 + max: 33430056 primary_compute_unit: NPU precision: int8 layer_info: @@ -448,14 +395,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 13 - job_id: jp3j1k7zg + job_id: j5wedrkm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 398.0 - throughput: 2512.5628140703516 + inference_time: 420.0 + throughput: 2380.9523809523807 estimated_peak_memory_range: - min: 20480 - max: 1813664 + min: 73728 + max: 1289296 primary_compute_unit: NPU precision: int8 layer_info: @@ -463,7 +410,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jpedw9x05 + job_id: jpy14yllp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -472,13 +419,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:21:04Z' + timestamp: '2024-11-09T23:40:00Z' - torchscript_onnx_tflite: - inference_time: 1859.0 - throughput: 537.9236148466917 + inference_time: 1802.0 + throughput: 554.9389567147614 estimated_peak_memory_range: - min: 12288 - max: 13149440 + min: 1589248 + max: 14853232 primary_compute_unit: NPU precision: int8 layer_info: @@ -486,14 +433,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 13 - job_id: j56yz6m7p + job_id: jg9l3qr8g job_status: Passed torchscript_onnx_qnn: - inference_time: 876.0 - throughput: 1141.552511415525 + inference_time: 896.0 + throughput: 1116.0714285714287 estimated_peak_memory_range: - min: 20480 - max: 5686192 + min: 16384 + max: 5702912 primary_compute_unit: NPU precision: int8 layer_info: @@ -501,7 +448,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jgjv2xy8g + job_id: jp0z1xwn5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -510,13 +457,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T01:21:03Z' + timestamp: '2024-11-09T23:40:03Z' - torchscript_onnx_tflite: - inference_time: 1487.0 - throughput: 672.4949562878278 + inference_time: 1909.0 + throughput: 523.8344683080147 estimated_peak_memory_range: - min: 12288 - max: 22544800 + min: 1593344 + max: 23888048 primary_compute_unit: NPU precision: int8 layer_info: @@ -524,14 +471,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 13 - job_id: jgo2kqvkp + job_id: jp14dm97p job_status: Passed torchscript_onnx_qnn: - inference_time: 545.0 - throughput: 1834.8623853211009 + inference_time: 537.0 + throughput: 1862.1973929236499 estimated_peak_memory_range: min: 65536 - max: 11714624 + max: 11867392 primary_compute_unit: NPU precision: int8 layer_info: @@ -539,7 +486,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: j5mnqyz9p + job_id: jp8q3knop job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -548,13 +495,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:31:22Z' + timestamp: '2024-11-09T23:40:05Z' - torchscript_onnx_qnn: - inference_time: 509.0 - throughput: 1964.6365422396857 + inference_time: 487.0 + throughput: 2053.388090349076 estimated_peak_memory_range: - min: 229376 - max: 229376 + min: 184320 + max: 184320 primary_compute_unit: NPU precision: int8 layer_info: @@ -562,22 +509,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jgdxelvzp + job_id: jgn69w4q5 job_status: Passed torchscript_onnx: - inference_time: 737.0 - throughput: 1356.85210312076 + inference_time: 3597.0 + throughput: 278.00945232137894 estimated_peak_memory_range: - min: 3350528 - max: 3350528 + min: 15511552 + max: 15511552 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 13 + layers_on_npu: 37 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 13 - job_id: jp8q0v7op + layers_on_cpu: 8 + total_layers: 45 + job_id: j56y30kyp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -586,4 +533,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:31:28Z' + timestamp: '2024-11-09T23:40:17Z' diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/test.py b/qai_hub_models/models/quicksrnetsmall_quantized/test.py deleted file mode 100644 index 08fe4cf3..00000000 --- a/qai_hub_models/models/quicksrnetsmall_quantized/test.py +++ /dev/null @@ -1,65 +0,0 @@ -# --------------------------------------------------------------------- -# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -# --------------------------------------------------------------------- - -import numpy as np -import pytest -import torch - -from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp -from qai_hub_models.models._shared.super_resolution.demo import IMAGE_ADDRESS -from qai_hub_models.models.quicksrnetsmall.model import MODEL_ASSET_VERSION, MODEL_ID -from qai_hub_models.models.quicksrnetsmall_quantized.demo import main as demo_main -from qai_hub_models.models.quicksrnetsmall_quantized.model import ( - QuickSRNetSmallQuantizable, -) -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image -from qai_hub_models.utils.testing import assert_most_close, skip_clone_repo_check - -OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetsmall_demo_output.png" -) - - -@skip_clone_repo_check -def test_task(): - # AIMET Quantization Simulator introduces randomness. Eliminate that for this test. - torch.manual_seed(0) - image = load_image(IMAGE_ADDRESS) - model = QuickSRNetSmallQuantizable.from_pretrained() - app = SuperResolutionApp(model=model) - app_output_image = app.predict(image)[0] - - output_image = load_image(OUTPUT_IMAGE_ADDRESS) - assert_most_close( - np.asarray(app_output_image, dtype=np.float32) / 255, - np.asarray(output_image, dtype=np.float32) / 255, - diff_tol=0.005, - rtol=0.02, - atol=0.2, - ) - - -@pytest.mark.trace -@skip_clone_repo_check -def test_trace(): - image = load_image(IMAGE_ADDRESS) - output_image = load_image(OUTPUT_IMAGE_ADDRESS) - app = SuperResolutionApp( - QuickSRNetSmallQuantizable.from_pretrained().convert_to_torchscript() - ) - app_output_image = app.predict(image)[0] - - assert_most_close( - np.asarray(app_output_image, dtype=np.float32) / 255, - np.asarray(output_image, dtype=np.float32) / 255, - diff_tol=0.005, - rtol=0.02, - atol=0.2, - ) - - -@skip_clone_repo_check -def test_demo(): - demo_main(is_test=True) diff --git a/qai_hub_models/models/qwen2_7b_instruct_quantized/README.md b/qai_hub_models/models/qwen2_7b_instruct_quantized/README.md index 83dc0e06..e2e00104 100644 --- a/qai_hub_models/models/qwen2_7b_instruct_quantized/README.md +++ b/qai_hub_models/models/qwen2_7b_instruct_quantized/README.md @@ -5,8 +5,7 @@ The Qwen2-7B-Instruct is a state-of-the-art multilingual language model with 7.07 billion parameters, excelling in language understanding, generation, coding, and mathematics. AI Hub provides with four QNN context binaries (shared weights) that can be deployed on Snapdragon 8 Elite with Genie SDK. -This is based on the implementation of Qwen2-7B-Instruct found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/qwen2_7b_instruct_quantized). diff --git a/qai_hub_models/models/real_esrgan_general_x4v3/README.md b/qai_hub_models/models/real_esrgan_general_x4v3/README.md index 34138a03..b9dabe38 100644 --- a/qai_hub_models/models/real_esrgan_general_x4v3/README.md +++ b/qai_hub_models/models/real_esrgan_general_x4v3/README.md @@ -5,8 +5,7 @@ Real-ESRGAN is a machine learning model that upscales an image with minimal loss in quality. -This is based on the implementation of Real-ESRGAN-General-x4v3 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/real_esrgan_general_x4v3). diff --git a/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml b/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml index 1e0574dc..7df34327 100644 --- a/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml +++ b/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: Real-ESRGAN-General-x4v3 performance_metrics: - torchscript_onnx_tflite: - inference_time: 7476.0 - throughput: 133.7613697164259 + inference_time: 7189.0 + throughput: 139.10140492418972 estimated_peak_memory_range: - min: 9469952 - max: 12536248 + min: 9486336 + max: 12506064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: jpxkn26l5 + job_id: j5wed7nz5 job_status: Passed torchscript_onnx_qnn: - inference_time: 6284.0 - throughput: 159.13430935709738 + inference_time: 6279.0 + throughput: 159.26102882624622 estimated_peak_memory_range: min: 16384 - max: 11496384 + max: 112593320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: j5q6eorop + job_id: jprv4l3vg job_status: Passed torchscript_onnx: - inference_time: 6775.0 - throughput: 147.60147601476015 + inference_time: 6790.0 + throughput: 147.27540500736376 estimated_peak_memory_range: - min: 10240000 - max: 11607920 + min: 7639040 + max: 21859816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 74 - job_id: jg9ljew8g + job_id: jpv61lk75 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:30:39Z' + timestamp: '2024-11-09T22:20:06Z' - torchscript_onnx_tflite: - inference_time: 7314.0 - throughput: 136.7240907847963 + inference_time: 5387.0 + throughput: 185.63207722294413 estimated_peak_memory_range: - min: 11464704 - max: 25075104 + min: 9482240 + max: 75086560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: j5mnqy69p + job_id: jg9l3meqg job_status: Passed torchscript_onnx_qnn: - inference_time: 6299.0 - throughput: 158.75535799333227 + inference_time: 4592.0 + throughput: 217.77003484320556 estimated_peak_memory_range: - min: 12288 - max: 11660672 + min: 0 + max: 19570768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jglv6r3m5 + job_id: jp2k7ryxp job_status: Passed torchscript_onnx: - inference_time: 6797.0 - throughput: 147.12373105781964 + inference_time: 4986.0 + throughput: 200.56157240272765 estimated_peak_memory_range: - min: 6725632 - max: 8125416 + min: 6529024 + max: 79447840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 74 - job_id: jp14yxe7p + job_id: jgjv0rn7g job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:30:40Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T22:20:07Z' - torchscript_onnx_tflite: - inference_time: 6017.0 - throughput: 166.19577862722286 + inference_time: 4149.0 + throughput: 241.02193299590263 estimated_peak_memory_range: - min: 9457664 - max: 74163456 + min: 12288 + max: 30310912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: jgn6l8mq5 + job_id: jp14djxkp job_status: Passed torchscript_onnx_qnn: - inference_time: 5200.0 - throughput: 192.30769230769232 + inference_time: 4325.0 + throughput: 231.21387283236993 estimated_peak_memory_range: - min: 208896 - max: 19358272 + min: 0 + max: 18354576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: j56yelnyp + job_id: jpy14o3rp job_status: Passed torchscript_onnx: - inference_time: 5656.0 - throughput: 176.8033946251768 + inference_time: 4615.0 + throughput: 216.68472372697724 estimated_peak_memory_range: - min: 6512640 - max: 78444496 + min: 7176192 + max: 35824768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 74 - job_id: jgdxelozp + job_id: jpedr7m75 job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:30:41Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T22:20:08Z' - torchscript_onnx_tflite: - inference_time: 5101.0 - throughput: 196.0399921584003 + inference_time: 7275.0 + throughput: 137.4570446735395 estimated_peak_memory_range: - min: 9453568 - max: 39331728 + min: 5844992 + max: 13840544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: jgke7mqng + job_id: jgdxr3lkp job_status: Passed torchscript_onnx_qnn: - inference_time: 4314.0 - throughput: 231.80343069077423 + inference_time: 5731.0 + throughput: 174.48961786773688 estimated_peak_memory_range: - min: 208896 - max: 19002352 + min: 262144 + max: 1408184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: j5we2nqm5 - job_status: Passed - torchscript_onnx: - inference_time: 4622.0 - throughput: 216.3565556036348 - estimated_peak_memory_range: - min: 7905280 - max: 36803392 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 74 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 74 - job_id: jp4lk0v15 + job_id: jp0z1m025 job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:30:43Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T22:19:58Z' - torchscript_onnx_tflite: - inference_time: 7266.0 - throughput: 137.62730525736305 + inference_time: 7495.0 + throughput: 133.422281521014 estimated_peak_memory_range: - min: 9482240 - max: 10766560 + min: 9535488 + max: 13448136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: jprv8j27g + job_id: j57yj43q5 job_status: Passed torchscript_onnx_qnn: - inference_time: 5754.0 - throughput: 173.79214459506431 + inference_time: 5765.0 + throughput: 173.46053772766695 estimated_peak_memory_range: - min: 270336 - max: 1540544 + min: 53248 + max: 4118312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jp3jv2eng + job_id: jgkel2xyg job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:30:31Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T22:20:00Z' - torchscript_onnx_tflite: - inference_time: 7447.0 - throughput: 134.28226131328051 + inference_time: 7340.0 + throughput: 136.23978201634878 estimated_peak_memory_range: - min: 9457664 - max: 21595480 + min: 9474048 + max: 129256568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: jpxk6vr95 + job_id: jp4lx10q5 job_status: Passed torchscript_onnx_qnn: - inference_time: 5791.0 - throughput: 172.68174753928508 + inference_time: 5776.0 + throughput: 173.13019390581718 estimated_peak_memory_range: - min: 221184 - max: 1887320 + min: 253952 + max: 1631384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jprv2kdeg + job_id: j5q67lq7p job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:20:42Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T22:20:01Z' - torchscript_onnx_tflite: - inference_time: 7268.0 - throughput: 137.5894331315355 + inference_time: 7297.0 + throughput: 137.04262025489928 estimated_peak_memory_range: - min: 9486336 - max: 13034552 + min: 9478144 + max: 132900480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: j5mn6rkqp + job_id: jpxk742j5 job_status: Passed torchscript_onnx_qnn: - inference_time: 5764.0 - throughput: 173.49063150589868 + inference_time: 5777.0 + throughput: 173.10022503029253 estimated_peak_memory_range: - min: 303104 - max: 1574288 + min: 229376 + max: 1565984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jp2k98dmp + job_id: jglv0yme5 job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:20:43Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T22:20:02Z' - torchscript_onnx_tflite: - inference_time: 7496.0 - throughput: 133.40448239060834 + inference_time: 13044.0 + throughput: 76.66360012266176 estimated_peak_memory_range: - min: 9478144 - max: 13623984 + min: 9482240 + max: 41201440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: jgn6m2qm5 + job_id: j5mnwmyyp job_status: Passed torchscript_onnx_qnn: - inference_time: 5784.0 - throughput: 172.89073305670817 + inference_time: 10256.0 + throughput: 97.50390015600624 estimated_peak_memory_range: - min: 266240 - max: 1407608 + min: 258048 + max: 6222464 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jpy1je24p + job_id: j56y384vp job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:20:44Z' + chipset: SA8295P + timestamp: '2024-11-09T22:20:04Z' - torchscript_onnx_tflite: - inference_time: 11124.0 - throughput: 89.89572096368212 + inference_time: 12381.0 + throughput: 80.768920119538 estimated_peak_memory_range: - min: 9572352 - max: 79576384 + min: 9465856 + max: 79149888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: jp8q0vmop + job_id: jgn69zvv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 9587.0 - throughput: 104.30791697089809 + inference_time: 9701.0 + throughput: 103.08215647871353 estimated_peak_memory_range: - min: 208896 - max: 25636752 + min: 237568 + max: 24945056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jgz3okrx5 + job_id: jgo21l64p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,13 +428,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:30:37Z' + timestamp: '2024-11-09T22:20:05Z' - torchscript_onnx_qnn: - inference_time: 6198.0 - throughput: 161.34236850596966 + inference_time: 6201.0 + throughput: 161.26431220770843 estimated_peak_memory_range: - min: 204800 - max: 204800 + min: 237568 + max: 237568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jgo2kq3kp + job_id: jp8q3eyzp job_status: Passed torchscript_onnx: - inference_time: 7128.0 - throughput: 140.29180695847361 + inference_time: 7125.0 + throughput: 140.35087719298247 estimated_peak_memory_range: - min: 9015296 - max: 9015296 + min: 8863744 + max: 8863744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 74 - job_id: j57y03x95 + job_id: jgz3xldz5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:30:42Z' + timestamp: '2024-11-09T22:20:09Z' diff --git a/qai_hub_models/models/real_esrgan_x4plus/README.md b/qai_hub_models/models/real_esrgan_x4plus/README.md index f3544722..99838be1 100644 --- a/qai_hub_models/models/real_esrgan_x4plus/README.md +++ b/qai_hub_models/models/real_esrgan_x4plus/README.md @@ -5,8 +5,7 @@ Real-ESRGAN is a machine learning model that upscales an image with minimal loss in quality. The implementation is a derivative of the Real-ESRGAN-x4plus architecture, a larger and more powerful version compared to the Real-ESRGAN-general-x4v3 architecture. -This is based on the implementation of Real-ESRGAN-x4plus found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/real_esrgan_x4plus). diff --git a/qai_hub_models/models/real_esrgan_x4plus/perf.yaml b/qai_hub_models/models/real_esrgan_x4plus/perf.yaml index 7ac37fa1..1379952a 100644 --- a/qai_hub_models/models/real_esrgan_x4plus/perf.yaml +++ b/qai_hub_models/models/real_esrgan_x4plus/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: Real-ESRGAN-x4plus performance_metrics: - torchscript_onnx_tflite: - inference_time: 67048.0 - throughput: 14.91468798472736 + inference_time: 65797.0 + throughput: 15.198261318905118 estimated_peak_memory_range: - min: 3194880 - max: 6448576 + min: 3256320 + max: 5770304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jpxkn2n35 + job_id: j5wed7v45 job_status: Passed torchscript_onnx_qnn: - inference_time: 67461.0 - throughput: 14.823379434043373 + inference_time: 68484.0 + throughput: 14.601950820629636 estimated_peak_memory_range: - min: 61440 - max: 221824744 + min: 159744 + max: 221512552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1029 - job_id: j5q6eoenp + job_id: jprv40qkg job_status: Passed torchscript_onnx: - inference_time: 72849.0 - throughput: 13.727024392922345 + inference_time: 68508.0 + throughput: 14.596835406083962 estimated_peak_memory_range: - min: 122880 - max: 44393216 + min: 106496 + max: 44047688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1030 - job_id: jg9ljeywg + job_id: jgo21rq1p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:29:57Z' + timestamp: '2024-11-09T22:19:23Z' - torchscript_onnx_tflite: - inference_time: 66391.0 - throughput: 15.062282538295854 + inference_time: 50593.0 + throughput: 19.76558021860732 estimated_peak_memory_range: - min: 3264512 - max: 6157968 + min: 3248128 + max: 700116224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: j5mnqyqdp + job_id: jg9l3m1mg job_status: Passed torchscript_onnx_qnn: - inference_time: 72421.0 - throughput: 13.808149569876141 + inference_time: 50424.0 + throughput: 19.831826114548626 estimated_peak_memory_range: - min: 126976 - max: 39907136 + min: 65536 + max: 112512032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1029 - job_id: jglv6r6j5 + job_id: jp2k7w66p job_status: Passed torchscript_onnx: - inference_time: 68447.0 - throughput: 14.609844112963314 + inference_time: 53369.0 + throughput: 18.73746931739399 estimated_peak_memory_range: - min: 110592 - max: 44439304 + min: 8867840 + max: 731811632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1030 - job_id: jp14yxw8p + job_id: jpv61dxz5 job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:29:58Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T22:19:24Z' - torchscript_onnx_tflite: - inference_time: 55516.0 - throughput: 18.012825131493624 + inference_time: 38259.0 + throughput: 26.1376408165399 estimated_peak_memory_range: - min: 3272704 - max: 695820352 + min: 3174400 + max: 193525120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jgn6l8lk5 + job_id: jp14djlnp job_status: Passed torchscript_onnx_qnn: - inference_time: 56624.0 - throughput: 17.66035603277762 + inference_time: 42951.0 + throughput: 23.282344997788176 estimated_peak_memory_range: - min: 86016 - max: 112977040 + min: 8192 + max: 135740560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1029 - job_id: j56yele6p + job_id: jpy14xw0p job_status: Passed torchscript_onnx: - inference_time: 62243.0 - throughput: 16.066063653744195 + inference_time: 38399.0 + throughput: 26.04234485273054 estimated_peak_memory_range: - min: 7995392 - max: 730687760 + min: 0 + max: 184988768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1030 - job_id: jgdxelqrp + job_id: jgjv0741g job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:29:59Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T22:19:25Z' - torchscript_onnx_tflite: - inference_time: 42930.0 - throughput: 23.293733985557886 + inference_time: 70609.0 + throughput: 14.16250053109377 estimated_peak_memory_range: - min: 3170304 - max: 192975856 + min: 3211264 + max: 5570728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jgke7m7wg + job_id: jgdxr396p job_status: Passed torchscript_onnx_qnn: - inference_time: 38261.0 - throughput: 26.13627453542772 + inference_time: 63533.0 + throughput: 15.739851730596698 estimated_peak_memory_range: - min: 0 - max: 139018944 + min: 389120 + max: 1493968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1029 - job_id: j5we2n335 - job_status: Passed - torchscript_onnx: - inference_time: 42973.0 - throughput: 23.27042561608452 - estimated_peak_memory_range: - min: 0 - max: 185735328 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 1030 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 1030 - job_id: jg9ljey8g + job_id: jp0z1j705 job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:30:02Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T22:19:15Z' - torchscript_onnx_tflite: - inference_time: 64186.0 - throughput: 15.57972143458075 + inference_time: 66455.0 + throughput: 15.047776690993906 estimated_peak_memory_range: - min: 1400832 - max: 4554872 + min: 3264512 + max: 6156656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jprv8j80g + job_id: j57yj4wn5 job_status: Passed torchscript_onnx_qnn: - inference_time: 64849.0 - throughput: 15.420438248855033 + inference_time: 63279.0 + throughput: 15.803031021349895 estimated_peak_memory_range: - min: 356352 - max: 1582736 + min: 417792 + max: 1817056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1029 - job_id: jp3jv2v3g + job_id: jgkel4mvg job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:29:49Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T22:19:18Z' - torchscript_onnx_tflite: - inference_time: 64796.0 - throughput: 15.433051422927342 + inference_time: 62482.0 + throughput: 16.004609327486317 estimated_peak_memory_range: - min: 2809856 - max: 5498624 + min: 3264512 + max: 5782968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jpv6r32m5 + job_id: jp4lx1o25 job_status: Passed torchscript_onnx_qnn: - inference_time: 65008.0 - throughput: 15.382722126507506 + inference_time: 63149.0 + throughput: 15.835563508527452 estimated_peak_memory_range: - min: 462848 - max: 1770624 + min: 397312 + max: 1707936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1029 - job_id: jgz3jez65 + job_id: j5q67yoep job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:20:22Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T22:19:19Z' - torchscript_onnx_tflite: - inference_time: 69309.0 - throughput: 14.428140645515013 + inference_time: 67708.0 + throughput: 14.769303479647899 estimated_peak_memory_range: - min: 3268608 - max: 6084744 + min: 3248128 + max: 5942520 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jgjv2x38g + job_id: jpxk74j85 job_status: Passed torchscript_onnx_qnn: - inference_time: 62733.0 - throughput: 15.940573541836036 + inference_time: 63990.0 + throughput: 15.62744178777934 estimated_peak_memory_range: - min: 385024 - max: 1889712 + min: 409600 + max: 1784928 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1029 - job_id: j5we3oyj5 + job_id: jglv0xr25 job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:20:24Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T22:19:20Z' - torchscript_onnx_tflite: - inference_time: 63971.0 - throughput: 15.632083287739757 + inference_time: 113129.0 + throughput: 8.839466449805089 estimated_peak_memory_range: - min: 3256320 - max: 6605952 + min: 3293184 + max: 179105168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jpedw9605 + job_id: j5mnwm27p job_status: Passed torchscript_onnx_qnn: - inference_time: 65285.0 - throughput: 15.31745423910546 + inference_time: 113108.0 + throughput: 8.841107613961878 estimated_peak_memory_range: - min: 364544 - max: 2057272 + min: 409600 + max: 6422176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1029 - job_id: jg9lyvovg + job_id: j56y37lnp job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:20:25Z' + chipset: SA8295P + timestamp: '2024-11-09T22:19:21Z' - torchscript_onnx_tflite: - inference_time: 150742.0 - throughput: 6.633851215984928 + inference_time: 131190.0 + throughput: 7.622532205198567 estimated_peak_memory_range: - min: 3452928 - max: 650551360 + min: 3461120 + max: 652256240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jp8q0v0kp + job_id: jgn69nyj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 124614.0 - throughput: 8.024780522252716 + inference_time: 134987.0 + throughput: 7.408120782001229 estimated_peak_memory_range: - min: 385024 - max: 90352864 + min: 339968 + max: 90547488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1029 - job_id: jgz3okjo5 + job_id: jp3j492mg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:29:55Z' + timestamp: '2024-11-09T22:19:22Z' - torchscript_onnx_qnn: - inference_time: 65183.0 - throughput: 15.341423377260941 + inference_time: 65122.0 + throughput: 15.355793740978472 estimated_peak_memory_range: min: 204800 max: 204800 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1029 - job_id: jgo2kqkqp + job_id: jp8q3xvqp job_status: Passed torchscript_onnx: inference_time: 65670.0 throughput: 15.227653418608192 estimated_peak_memory_range: - min: 39747584 - max: 39747584 + min: 39718912 + max: 39718912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1030 - job_id: j5we2n3m5 + job_id: jpedrz385 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:30:00Z' + timestamp: '2024-11-09T22:19:26Z' diff --git a/qai_hub_models/models/regnet/README.md b/qai_hub_models/models/regnet/README.md index e9368f1b..54c2c7f1 100644 --- a/qai_hub_models/models/regnet/README.md +++ b/qai_hub_models/models/regnet/README.md @@ -5,8 +5,7 @@ RegNet is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of RegNet found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/regnet). diff --git a/qai_hub_models/models/regnet/perf.yaml b/qai_hub_models/models/regnet/perf.yaml index 365e7182..fd6895e4 100644 --- a/qai_hub_models/models/regnet/perf.yaml +++ b/qai_hub_models/models/regnet/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: RegNet performance_metrics: - torchscript_onnx_tflite: - inference_time: 2065.0 - throughput: 484.26150121065376 + inference_time: 2057.0 + throughput: 486.1448711716091 estimated_peak_memory_range: - min: 12288 - max: 2125344 + min: 32768 + max: 2057576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 114 - job_id: jp14yx28p + job_id: jpv61d3z5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2170.0 - throughput: 460.8294930875576 + inference_time: 2138.0 + throughput: 467.7268475210477 estimated_peak_memory_range: - min: 360448 - max: 62443752 + min: 614400 + max: 62348448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,67 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: jpy1r0z8p + job_id: jp4lx1q25 job_status: Passed torchscript_onnx: - inference_time: 2188.0 - throughput: 457.0383912248629 - estimated_peak_memory_range: - min: 16384 - max: 43785208 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 190 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 190 - job_id: jgjvz4zvg - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:29:00Z' - - torchscript_onnx_tflite: - inference_time: 2076.0 - throughput: 481.6955684007707 + inference_time: 2164.0 + throughput: 462.1072088724584 estimated_peak_memory_range: min: 16384 - max: 67927648 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 114 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 114 - job_id: jgdxelnrp - job_status: Passed - torchscript_onnx_qnn: - inference_time: 2148.0 - throughput: 465.54934823091247 - estimated_peak_memory_range: - min: 20480 - max: 62386224 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 188 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 188 - job_id: jp0z37495 - job_status: Passed - torchscript_onnx: - inference_time: 2189.0 - throughput: 456.82960255824577 - estimated_peak_memory_range: - min: 618496 - max: 393329952 + max: 42965304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 190 - job_id: jpede3eo5 + job_id: j5q67ykep job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -145,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:29:01Z' + timestamp: '2024-11-09T22:18:24Z' - torchscript_onnx_tflite: - inference_time: 1711.0 - throughput: 584.4535359438925 + inference_time: 1394.0 + throughput: 717.3601147776184 estimated_peak_memory_range: min: 16384 - max: 149828720 + max: 151116096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 114 - job_id: j57y032v5 + job_id: jgjv07x1g job_status: Passed torchscript_onnx_qnn: - inference_time: 1657.0 - throughput: 603.5003017501509 + inference_time: 1485.0 + throughput: 673.4006734006734 estimated_peak_memory_range: - min: 618496 - max: 31525008 + min: 0 + max: 31856656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: jp8q0v2kp + job_id: jpxk74v85 job_status: Passed torchscript_onnx: - inference_time: 1761.0 - throughput: 567.8591709256104 + inference_time: 1574.0 + throughput: 635.3240152477764 estimated_peak_memory_range: min: 0 - max: 153934144 + max: 153454672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,7 +138,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 190 - job_id: jgz3okoo5 + job_id: jglv0xz25 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -198,13 +147,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:29:02Z' + timestamp: '2024-11-09T22:18:25Z' - torchscript_onnx_tflite: - inference_time: 1385.0 - throughput: 722.0216606498195 + inference_time: 1383.0 + throughput: 723.0657989877079 estimated_peak_memory_range: min: 12288 - max: 75605488 + max: 74412160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 114 - job_id: jp2k0nxrp + job_id: jpedrz985 job_status: Passed torchscript_onnx_qnn: - inference_time: 1236.0 - throughput: 809.0614886731391 + inference_time: 1425.0 + throughput: 701.7543859649123 estimated_peak_memory_range: min: 0 - max: 29746224 + max: 29526304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: jpv60x0k5 + job_id: j5mnwmr7p job_status: Passed torchscript_onnx: - inference_time: 1339.0 - throughput: 746.8259895444362 + inference_time: 1334.0 + throughput: 749.6251874062968 estimated_peak_memory_range: min: 0 - max: 77894144 + max: 77426304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -242,7 +191,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 190 - job_id: jg9ljejwg + job_id: j56y37jnp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -251,13 +200,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:29:04Z' + timestamp: '2024-11-09T22:18:26Z' - torchscript_onnx_tflite: inference_time: 2023.0 throughput: 494.3153732081068 estimated_peak_memory_range: - min: 20480 - max: 12663648 + min: 16384 + max: 6813448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 114 - job_id: jp4lk0n85 + job_id: jgz3xme45 job_status: Passed torchscript_onnx_qnn: - inference_time: 2014.0 - throughput: 496.52432969215494 + inference_time: 2036.0 + throughput: 491.1591355599214 estimated_peak_memory_range: - min: 638976 - max: 1792736 + min: 655360 + max: 1973624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,7 +229,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: jgke7mvwg + job_id: jgn69n2j5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -289,13 +238,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:28:51Z' + timestamp: '2024-11-09T22:18:17Z' - torchscript_onnx_tflite: - inference_time: 2037.0 - throughput: 490.9180166912126 + inference_time: 2033.0 + throughput: 491.88391539596654 estimated_peak_memory_range: - min: 155648 - max: 2486256 + min: 24576 + max: 2125120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 114 - job_id: jp2k98omp + job_id: j5wed7o45 job_status: Passed torchscript_onnx_qnn: - inference_time: 2058.0 - throughput: 485.90864917395527 + inference_time: 2045.0 + throughput: 488.99755501222495 estimated_peak_memory_range: min: 634880 - max: 2250624 + max: 1881040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,7 +267,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: jp8qmoj8p + job_id: jp2k7w86p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -327,13 +276,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:19:47Z' + timestamp: '2024-11-09T22:18:19Z' - torchscript_onnx_tflite: - inference_time: 2049.0 - throughput: 488.0429477794046 + inference_time: 2059.0 + throughput: 485.67265662943174 estimated_peak_memory_range: min: 28672 - max: 1571104 + max: 2243880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 114 - job_id: jpy1je84p + job_id: jg9l3mvmg job_status: Passed torchscript_onnx_qnn: - inference_time: 2032.0 - throughput: 492.12598425196853 + inference_time: 2016.0 + throughput: 496.031746031746 estimated_peak_memory_range: - min: 663552 - max: 1760376 + min: 638976 + max: 1978384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,7 +305,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: jgkeqz6og + job_id: jpy14xe0p job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -365,13 +314,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:19:48Z' + timestamp: '2024-11-09T22:18:20Z' - torchscript_onnx_tflite: - inference_time: 2057.0 - throughput: 486.1448711716091 + inference_time: 2027.0 + throughput: 493.33991119881597 estimated_peak_memory_range: - min: 20480 - max: 1986112 + min: 0 + max: 330096592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 114 - job_id: jp0z2yoe5 + job_id: jp14dj0np job_status: Passed torchscript_onnx_qnn: - inference_time: 2025.0 - throughput: 493.82716049382714 + inference_time: 2037.0 + throughput: 490.9180166912126 estimated_peak_memory_range: - min: 622592 - max: 1915568 + min: 634880 + max: 1825896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,7 +343,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: j5q6r84mp + job_id: jp0z1jq05 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -403,13 +352,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:19:49Z' + timestamp: '2024-11-09T22:18:21Z' + - torchscript_onnx_tflite: + inference_time: 3562.0 + throughput: 280.74115665356544 + estimated_peak_memory_range: + min: 16384 + max: 42077936 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 114 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 114 + job_id: jgdxr3w6p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 3655.0 + throughput: 273.59781121751024 + estimated_peak_memory_range: + min: 663552 + max: 6567216 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 188 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 188 + job_id: jp8q3x9qp + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:18:22Z' - torchscript_onnx_tflite: inference_time: 2802.0 throughput: 356.8879371877231 estimated_peak_memory_range: - min: 20480 - max: 131808576 + min: 16384 + max: 132896976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 114 - job_id: jprv8j60g + job_id: j57yj4zn5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2904.0 - throughput: 344.3526170798898 + inference_time: 2934.0 + throughput: 340.83162917518746 estimated_peak_memory_range: - min: 618496 - max: 25606720 + min: 626688 + max: 26023088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: jgo2kqzqp + job_id: jgkel4nvg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:28:57Z' + timestamp: '2024-11-09T22:18:23Z' - torchscript_onnx_qnn: - inference_time: 2213.0 - throughput: 451.8752824220515 + inference_time: 2208.0 + throughput: 452.8985507246377 estimated_peak_memory_range: min: 602112 max: 602112 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: j5q6eo0np + job_id: jprv40kkg job_status: Passed torchscript_onnx: - inference_time: 2211.0 - throughput: 452.2840343735866 + inference_time: 2220.0 + throughput: 450.45045045045043 estimated_peak_memory_range: - min: 43016192 - max: 43016192 + min: 41680896 + max: 41680896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 190 - job_id: j5we2n235 + job_id: jp3j493mg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:29:03Z' + timestamp: '2024-11-09T22:18:27Z' diff --git a/qai_hub_models/models/regnet_quantized/README.md b/qai_hub_models/models/regnet_quantized/README.md index 6eb10e69..67d52298 100644 --- a/qai_hub_models/models/regnet_quantized/README.md +++ b/qai_hub_models/models/regnet_quantized/README.md @@ -5,8 +5,7 @@ RegNet is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of RegNetQuantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/regnet_quantized). diff --git a/qai_hub_models/models/regnet_quantized/perf.yaml b/qai_hub_models/models/regnet_quantized/perf.yaml index 93cb6152..b28652ee 100644 --- a/qai_hub_models/models/regnet_quantized/perf.yaml +++ b/qai_hub_models/models/regnet_quantized/perf.yaml @@ -46,64 +46,11 @@ models: - name: RegNetQuantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 906.0 - throughput: 1103.7527593818984 - estimated_peak_memory_range: - min: 12288 - max: 13363696 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 116 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 116 - job_id: j5we32665 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 1036.0 - throughput: 965.2509652509652 - estimated_peak_memory_range: - min: 16384 - max: 50324976 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 189 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 189 - job_id: jp0z23065 - job_status: Passed - torchscript_onnx: - inference_time: 1500.0 - throughput: 666.6666666666666 - estimated_peak_memory_range: - min: 16384 - max: 257965648 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 218 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 218 - job_id: j5we32e65 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:47:55Z' - - torchscript_onnx_tflite: - inference_time: 901.0 - throughput: 1109.8779134295228 + inference_time: 900.0 + throughput: 1111.111111111111 estimated_peak_memory_range: min: 16384 - max: 61696384 + max: 261410944 primary_compute_unit: NPU precision: int8 layer_info: @@ -111,14 +58,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 116 - job_id: jg9lyjnlg + job_id: jgkelkkwg job_status: Passed torchscript_onnx_qnn: - inference_time: 1029.0 - throughput: 971.8172983479105 + inference_time: 1041.0 + throughput: 960.6147934678194 estimated_peak_memory_range: - min: 16384 - max: 51218880 + min: 12288 + max: 63372488 primary_compute_unit: NPU precision: int8 layer_info: @@ -126,22 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jp8qm0yxp - job_status: Passed - torchscript_onnx: - inference_time: 1516.0 - throughput: 659.6306068601583 - estimated_peak_memory_range: - min: 12288 - max: 27046832 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 218 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 218 - job_id: jg9lyjllg + job_id: jg9l3q7wg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -150,13 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:47:57Z' + timestamp: '2024-11-09T23:38:39Z' - torchscript_onnx_tflite: - inference_time: 628.0 - throughput: 1592.3566878980891 + inference_time: 645.0 + throughput: 1550.3875968992247 estimated_peak_memory_range: min: 16384 - max: 140301984 + max: 139832224 primary_compute_unit: NPU precision: int8 layer_info: @@ -164,14 +96,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 116 - job_id: jp14wyz2p + job_id: j5q67ddnp job_status: Passed torchscript_onnx_qnn: - inference_time: 743.0 - throughput: 1345.8950201884254 + inference_time: 735.0 + throughput: 1360.544217687075 estimated_peak_memory_range: - min: 0 - max: 31070064 + min: 163840 + max: 32390704 primary_compute_unit: NPU precision: int8 layer_info: @@ -179,22 +111,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jgkeq7x2g - job_status: Passed - torchscript_onnx: - inference_time: 1094.0 - throughput: 914.0767824497258 - estimated_peak_memory_range: - min: 0 - max: 176139136 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 218 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 218 - job_id: jp14wy42p + job_id: jp14dmk8p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -203,13 +120,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:47:58Z' + timestamp: '2024-11-09T23:38:40Z' - torchscript_onnx_tflite: - inference_time: 523.0 - throughput: 1912.0458891013384 + inference_time: 524.0 + throughput: 1908.3969465648854 estimated_peak_memory_range: min: 8192 - max: 69220704 + max: 69832272 primary_compute_unit: NPU precision: int8 layer_info: @@ -217,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 116 - job_id: jp2k90y4p + job_id: jglv0qqj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 617.0 - throughput: 1620.7455429497568 + inference_time: 763.0 + throughput: 1310.615989515072 estimated_peak_memory_range: - min: 159744 - max: 30870624 + min: 0 + max: 30066912 primary_compute_unit: NPU precision: int8 layer_info: @@ -232,22 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jpedwem15 - job_status: Passed - torchscript_onnx: - inference_time: 907.0 - throughput: 1102.5358324145534 - estimated_peak_memory_range: - min: 0 - max: 88946576 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 218 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 218 - job_id: j57yl0yl5 + job_id: jgdxrmyrp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -256,13 +158,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:48:02Z' + timestamp: '2024-11-09T23:38:42Z' - torchscript_onnx_tflite: - inference_time: 30383.0 - throughput: 32.91314221768752 + inference_time: 29435.0 + throughput: 33.973161202649905 estimated_peak_memory_range: - min: 143360 - max: 75854768 + min: 61440 + max: 76230832 primary_compute_unit: GPU precision: int8 layer_info: @@ -270,14 +172,14 @@ models: layers_on_gpu: 116 layers_on_cpu: 0 total_layers: 116 - job_id: jgdxqe1ep + job_id: j56y3006p job_status: Passed torchscript_onnx_qnn: - inference_time: 4113.0 - throughput: 243.13153415998056 + inference_time: 4288.0 + throughput: 233.2089552238806 estimated_peak_memory_range: - min: 221184 - max: 8020128 + min: 12288 + max: 8294912 primary_compute_unit: NPU precision: int8 layer_info: @@ -285,7 +187,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: j5q6req4p + job_id: j57yj81v5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -294,13 +196,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-27T00:47:39Z' + timestamp: '2024-11-09T23:38:25Z' - torchscript_onnx_tflite: - inference_time: 36404.0 - throughput: 27.46950884518185 + inference_time: 40210.0 + throughput: 24.86943546381497 estimated_peak_memory_range: - min: 2772992 - max: 67381776 + min: 2752512 + max: 66426512 primary_compute_unit: GPU precision: int8 layer_info: @@ -308,7 +210,7 @@ models: layers_on_gpu: 91 layers_on_cpu: 13 total_layers: 116 - job_id: j57yl0rl5 + job_id: jp3j4rr3g job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -317,13 +219,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-27T00:47:20Z' + timestamp: '2024-11-09T23:38:07Z' - torchscript_onnx_tflite: - inference_time: 895.0 - throughput: 1117.31843575419 + inference_time: 880.0 + throughput: 1136.3636363636363 estimated_peak_memory_range: min: 12288 - max: 2446728 + max: 1304424 primary_compute_unit: NPU precision: int8 layer_info: @@ -331,14 +233,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 116 - job_id: jp4ldkrv5 + job_id: jgo2199qp job_status: Passed torchscript_onnx_qnn: - inference_time: 957.0 - throughput: 1044.932079414838 + inference_time: 948.0 + throughput: 1054.8523206751054 estimated_peak_memory_range: - min: 184320 - max: 1331640 + min: 180224 + max: 1558864 primary_compute_unit: NPU precision: int8 layer_info: @@ -346,7 +248,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jglv26m85 + job_id: jp4lx2685 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -355,13 +257,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:47:40Z' + timestamp: '2024-11-09T23:38:26Z' - torchscript_onnx_tflite: - inference_time: 900.0 - throughput: 1111.111111111111 + inference_time: 894.0 + throughput: 1118.5682326621925 estimated_peak_memory_range: min: 12288 - max: 29974680 + max: 4678808 primary_compute_unit: NPU precision: int8 layer_info: @@ -369,14 +271,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 116 - job_id: jgkeqnmwg + job_id: jpv61nyk5 job_status: Passed torchscript_onnx_qnn: - inference_time: 974.0 - throughput: 1026.694045174538 + inference_time: 979.0 + throughput: 1021.4504596527069 estimated_peak_memory_range: min: 184320 - max: 1785360 + max: 1841096 primary_compute_unit: NPU precision: int8 layer_info: @@ -384,7 +286,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jp3j1323g + job_id: j5mnwl1dp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -393,13 +295,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T02:06:05Z' + timestamp: '2024-11-09T23:38:30Z' - torchscript_onnx_tflite: - inference_time: 897.0 - throughput: 1114.8272017837235 + inference_time: 909.0 + throughput: 1100.1100110011 estimated_peak_memory_range: min: 12288 - max: 1456848 + max: 6657416 primary_compute_unit: NPU precision: int8 layer_info: @@ -407,14 +309,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 116 - job_id: j5q6rkonp + job_id: jgjv086vg job_status: Passed torchscript_onnx_qnn: - inference_time: 969.0 - throughput: 1031.9917440660474 + inference_time: 976.0 + throughput: 1024.5901639344263 estimated_peak_memory_range: - min: 172032 - max: 1590120 + min: 167936 + max: 1497928 primary_compute_unit: NPU precision: int8 layer_info: @@ -422,7 +324,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jgo2n0qqp + job_id: jgn69wdk5 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -431,13 +333,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T02:06:07Z' + timestamp: '2024-11-09T23:38:32Z' - torchscript_onnx_tflite: - inference_time: 892.0 - throughput: 1121.0762331838564 + inference_time: 902.0 + throughput: 1108.6474501108648 estimated_peak_memory_range: - min: 16384 - max: 6099864 + min: 12288 + max: 19760880 primary_compute_unit: NPU precision: int8 layer_info: @@ -445,14 +347,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 116 - job_id: j56yzjl6p + job_id: jpedrn0o5 job_status: Passed torchscript_onnx_qnn: - inference_time: 965.0 - throughput: 1036.2694300518135 + inference_time: 961.0 + throughput: 1040.5827263267429 estimated_peak_memory_range: - min: 184320 - max: 1467096 + min: 180224 + max: 1545360 primary_compute_unit: NPU precision: int8 layer_info: @@ -460,7 +362,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jgjv24nvg + job_id: jprv47m0g job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -469,13 +371,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T02:06:10Z' + timestamp: '2024-11-09T23:38:33Z' - torchscript_onnx_tflite: - inference_time: 1523.0 - throughput: 656.5988181221273 + inference_time: 1530.0 + throughput: 653.59477124183 estimated_peak_memory_range: min: 12288 - max: 70180848 + max: 70368496 primary_compute_unit: NPU precision: int8 layer_info: @@ -483,14 +385,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 116 - job_id: jglv2zrj5 + job_id: jgz3x0qo5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1899.0 - throughput: 526.592943654555 + inference_time: 1947.0 + throughput: 513.6106831022086 estimated_peak_memory_range: min: 0 - max: 5917648 + max: 5839536 primary_compute_unit: NPU precision: int8 layer_info: @@ -498,7 +400,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jpv6rxkk5 + job_id: jp2k7zqrp job_status: Passed reference_device_info: name: SA8295P ADP @@ -507,13 +409,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T02:06:09Z' + timestamp: '2024-11-09T23:38:35Z' - torchscript_onnx_tflite: - inference_time: 1026.0 - throughput: 974.6588693957115 + inference_time: 1034.0 + throughput: 967.1179883945841 estimated_peak_memory_range: - min: 0 - max: 142787600 + min: 12288 + max: 144801536 primary_compute_unit: NPU precision: int8 layer_info: @@ -521,14 +423,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 116 - job_id: jprv2839g + job_id: j5wedr035 job_status: Passed torchscript_onnx_qnn: - inference_time: 1198.0 - throughput: 834.7245409015025 + inference_time: 1231.0 + throughput: 812.3476848090983 estimated_peak_memory_range: min: 163840 - max: 34202336 + max: 30262672 primary_compute_unit: NPU precision: int8 layer_info: @@ -536,7 +438,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jgjv2znxg + job_id: jpy14yk8p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -545,13 +447,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:47:50Z' + timestamp: '2024-11-09T23:38:37Z' - torchscript_onnx_qnn: - inference_time: 1153.0 - throughput: 867.3026886383348 + inference_time: 1174.0 + throughput: 851.7887563884157 estimated_peak_memory_range: - min: 516096 - max: 516096 + min: 438272 + max: 438272 primary_compute_unit: NPU precision: int8 layer_info: @@ -559,22 +461,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: j56yze40p - job_status: Passed - torchscript_onnx: - inference_time: 1548.0 - throughput: 645.9948320413437 - estimated_peak_memory_range: - min: 23367680 - max: 23367680 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 218 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 218 - job_id: jgdxqexep + job_id: jpxk7z835 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -583,4 +470,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:48:00Z' + timestamp: '2024-11-09T23:38:44Z' diff --git a/qai_hub_models/models/resnet101/README.md b/qai_hub_models/models/resnet101/README.md index c9f9189a..156475ef 100644 --- a/qai_hub_models/models/resnet101/README.md +++ b/qai_hub_models/models/resnet101/README.md @@ -5,8 +5,7 @@ ResNet101 is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of ResNet101 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/resnet101). diff --git a/qai_hub_models/models/resnet101/perf.yaml b/qai_hub_models/models/resnet101/perf.yaml index e401cdb1..cc97a728 100644 --- a/qai_hub_models/models/resnet101/perf.yaml +++ b/qai_hub_models/models/resnet101/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: ResNet101 performance_metrics: - torchscript_onnx_tflite: - inference_time: 3421.0 - throughput: 292.3121894182987 + inference_time: 3394.0 + throughput: 294.6375957572186 estimated_peak_memory_range: min: 16384 - max: 103475552 + max: 1872776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jpede3do5 + job_id: jglv0xj25 job_status: Passed torchscript_onnx_qnn: - inference_time: 3524.0 - throughput: 283.7684449489217 + inference_time: 3496.0 + throughput: 286.0411899313501 estimated_peak_memory_range: - min: 16384 - max: 156532768 + min: 12288 + max: 83134160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,67 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j5mnqyndp + job_id: jg9l3m6mg job_status: Passed torchscript_onnx: - inference_time: 3513.0 - throughput: 284.6569883290635 - estimated_peak_memory_range: - min: 20480 - max: 1217659360 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 247 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 247 - job_id: j56yely6p - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:28:16Z' - - torchscript_onnx_tflite: - inference_time: 3417.0 - throughput: 292.654375182909 - estimated_peak_memory_range: - min: 0 - max: 2291432 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 147 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 147 - job_id: jgz3ok3o5 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 3521.0 - throughput: 284.0102243680773 + inference_time: 3506.0 + throughput: 285.2253280091272 estimated_peak_memory_range: min: 16384 - max: 166884672 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 245 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 245 - job_id: jgn6l86k5 - job_status: Passed - torchscript_onnx: - inference_time: 3546.0 - throughput: 282.0078962210942 - estimated_peak_memory_range: - min: 12288 - max: 101936064 + max: 102240136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: jp3jv2j3g + job_id: jpy14x90p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -145,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:28:17Z' + timestamp: '2024-11-09T22:17:41Z' - torchscript_onnx_tflite: - inference_time: 2800.0 - throughput: 357.14285714285717 + inference_time: 2424.0 + throughput: 412.54125412541254 estimated_peak_memory_range: min: 16384 - max: 119129312 + max: 120047984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: j5we2ne35 + job_id: j56y37knp job_status: Passed torchscript_onnx_qnn: - inference_time: 3000.0 - throughput: 333.3333333333333 + inference_time: 2509.0 + throughput: 398.5651654045436 estimated_peak_memory_range: min: 618496 - max: 36947888 + max: 36785296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jprv8jv0g + job_id: jp14djrnp job_status: Passed torchscript_onnx: - inference_time: 2954.0 - throughput: 338.52403520649966 + inference_time: 2557.0 + throughput: 391.08330074305826 estimated_peak_memory_range: min: 0 - max: 123289872 + max: 124349600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,7 +138,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: jgo2kq2qp + job_id: jp0z1jy05 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -198,13 +147,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:28:18Z' + timestamp: '2024-11-09T22:17:42Z' - torchscript_onnx_tflite: - inference_time: 2349.0 - throughput: 425.7130693912303 + inference_time: 2354.0 + throughput: 424.8088360237893 estimated_peak_memory_range: min: 12288 - max: 43966192 + max: 44684144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jpxkn2k35 + job_id: jp3j49ymg job_status: Passed torchscript_onnx_qnn: - inference_time: 2056.0 - throughput: 486.38132295719845 + inference_time: 2404.0 + throughput: 415.97337770382694 estimated_peak_memory_range: min: 0 - max: 33438544 + max: 33192912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jglv6rvj5 + job_id: jp4lxzev5 job_status: Passed torchscript_onnx: - inference_time: 2530.0 - throughput: 395.25691699604744 + inference_time: 2532.0 + throughput: 394.9447077409163 estimated_peak_memory_range: min: 0 - max: 46977552 + max: 47385920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -242,7 +191,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: jgjvz4dvg + job_id: jp8q3xoqp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -251,13 +200,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:28:20Z' + timestamp: '2024-11-11T13:57:27Z' - torchscript_onnx_tflite: - inference_time: 3408.0 - throughput: 293.42723004694835 + inference_time: 3382.0 + throughput: 295.68302779420463 estimated_peak_memory_range: - min: 28672 - max: 2055104 + min: 16384 + max: 17287872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jg9ljelwg + job_id: jgo21rj1p job_status: Passed torchscript_onnx_qnn: - inference_time: 3283.0 - throughput: 304.5994517209869 + inference_time: 3279.0 + throughput: 304.9710277523635 estimated_peak_memory_range: - min: 679936 - max: 1870608 + min: 634880 + max: 1864480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,7 +229,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jp2k0nkrp + job_id: j57yj4qn5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -289,13 +238,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:28:08Z' + timestamp: '2024-11-09T22:17:33Z' - torchscript_onnx_tflite: - inference_time: 3408.0 - throughput: 293.42723004694835 + inference_time: 3405.0 + throughput: 293.68575624082234 estimated_peak_memory_range: min: 24576 - max: 1894560 + max: 1940712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: j5we3olj5 + job_id: jpv61djz5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3304.0 - throughput: 302.6634382566586 + inference_time: 3275.0 + throughput: 305.3435114503817 estimated_peak_memory_range: - min: 626688 - max: 1807616 + min: 28672 + max: 1562224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,7 +267,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jgdxqwdlp + job_id: jpxk74w85 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -327,13 +276,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:19:25Z' + timestamp: '2024-11-09T22:17:35Z' - torchscript_onnx_tflite: - inference_time: 3389.0 - throughput: 295.0722927117144 + inference_time: 3404.0 + throughput: 293.7720329024677 estimated_peak_memory_range: min: 16384 - max: 1906496 + max: 2272424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jg9lyvzvg + job_id: jgjv07j1g job_status: Passed torchscript_onnx_qnn: - inference_time: 3349.0 - throughput: 298.59659599880564 + inference_time: 3314.0 + throughput: 301.75015087507546 estimated_peak_memory_range: - min: 2154496 - max: 3310680 + min: 647168 + max: 2378024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,7 +305,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j57ylzor5 + job_id: j5mnwmj7p job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -365,13 +314,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:19:26Z' + timestamp: '2024-11-09T22:17:36Z' - torchscript_onnx_tflite: - inference_time: 3439.0 - throughput: 290.7822041291073 + inference_time: 3397.0 + throughput: 294.3773918163085 estimated_peak_memory_range: min: 20480 - max: 2119000 + max: 2089848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jp14w0nlp + job_id: jpedrzj85 job_status: Passed torchscript_onnx_qnn: - inference_time: 3319.0 - throughput: 301.29557095510694 + inference_time: 3317.0 + throughput: 301.4772384684956 estimated_peak_memory_range: - min: 643072 - max: 1926112 + min: 634880 + max: 2274328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,7 +343,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jp4ldqel5 + job_id: jgn69njj5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -403,13 +352,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:19:27Z' + timestamp: '2024-11-09T22:17:37Z' + - torchscript_onnx_tflite: + inference_time: 5710.0 + throughput: 175.13134851138355 + estimated_peak_memory_range: + min: 20480 + max: 24577904 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 147 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 147 + job_id: jgz3xm145 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 5719.0 + throughput: 174.85574401119078 + estimated_peak_memory_range: + min: 618496 + max: 6489088 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 245 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 245 + job_id: jprv40zkg + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:17:38Z' - torchscript_onnx_tflite: - inference_time: 4761.0 - throughput: 210.03990758244066 + inference_time: 4772.0 + throughput: 209.55574182732607 estimated_peak_memory_range: min: 12288 - max: 97165536 + max: 97537024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jp4lk0l85 + job_id: j5wed7j45 job_status: Passed torchscript_onnx_qnn: - inference_time: 4838.0 - throughput: 206.69698222405952 + inference_time: 4883.0 + throughput: 204.7921359819783 estimated_peak_memory_range: - min: 638976 - max: 23185008 + min: 712704 + max: 26069008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j5q6eo6np + job_id: jp2k7w26p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:28:14Z' + timestamp: '2024-11-09T22:17:40Z' - torchscript_onnx_qnn: - inference_time: 3495.0 - throughput: 286.1230329041488 + inference_time: 3469.0 + throughput: 288.2675122513693 estimated_peak_memory_range: min: 602112 max: 602112 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jpy1r018p + job_id: jp4lx1z25 job_status: Passed torchscript_onnx: - inference_time: 3509.0 - throughput: 284.9814762040467 + inference_time: 3554.0 + throughput: 281.37310073157005 estimated_peak_memory_range: - min: 90726400 - max: 90726400 + min: 90669056 + max: 90669056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: jpv60xqk5 + job_id: jgkel4zvg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:28:19Z' + timestamp: '2024-11-09T22:17:44Z' diff --git a/qai_hub_models/models/resnet101_quantized/README.md b/qai_hub_models/models/resnet101_quantized/README.md index 8c3c49d1..0e5574b1 100644 --- a/qai_hub_models/models/resnet101_quantized/README.md +++ b/qai_hub_models/models/resnet101_quantized/README.md @@ -5,8 +5,7 @@ ResNet101 is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of ResNet101Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/resnet101_quantized). diff --git a/qai_hub_models/models/resnet101_quantized/perf.yaml b/qai_hub_models/models/resnet101_quantized/perf.yaml index 258dde83..dc73b874 100644 --- a/qai_hub_models/models/resnet101_quantized/perf.yaml +++ b/qai_hub_models/models/resnet101_quantized/perf.yaml @@ -49,64 +49,11 @@ models: - name: ResNet101Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1164.0 - throughput: 859.106529209622 - estimated_peak_memory_range: - min: 45056 - max: 12295504 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 150 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 150 - job_id: jgo2nz0dp - job_status: Passed - torchscript_onnx_qnn: - inference_time: 1380.0 - throughput: 724.6376811594203 - estimated_peak_memory_range: - min: 16384 - max: 47213904 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 246 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 246 - job_id: j5mn6eyqp - job_status: Passed - torchscript_onnx: - inference_time: 2258.0 - throughput: 442.8697962798937 - estimated_peak_memory_range: - min: 12288 - max: 52832152 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 283 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 283 - job_id: jgo2nzqdp - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:46:45Z' - - torchscript_onnx_tflite: - inference_time: 1154.0 - throughput: 866.5511265164645 + inference_time: 1162.0 + throughput: 860.5851979345955 estimated_peak_memory_range: - min: 20480 - max: 2063480 + min: 28672 + max: 1372120 primary_compute_unit: NPU precision: int8 layer_info: @@ -114,14 +61,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jpv6rqom5 + job_id: j5mnwlkdp job_status: Passed torchscript_onnx_qnn: - inference_time: 1378.0 - throughput: 725.6894049346879 + inference_time: 1384.0 + throughput: 722.543352601156 estimated_peak_memory_range: - min: 16384 - max: 47056320 + min: 40960 + max: 57206552 primary_compute_unit: NPU precision: int8 layer_info: @@ -129,22 +76,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jgn6m08m5 - job_status: Passed - torchscript_onnx: - inference_time: 2237.0 - throughput: 447.02726866338844 - estimated_peak_memory_range: - min: 12288 - max: 52703104 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 283 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 283 - job_id: jpv6rqxm5 + job_id: jgo219wqp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -153,13 +85,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:46:46Z' + timestamp: '2024-11-09T23:37:29Z' - torchscript_onnx_tflite: - inference_time: 880.0 - throughput: 1136.3636363636363 + inference_time: 873.0 + throughput: 1145.475372279496 estimated_peak_memory_range: min: 12288 - max: 101903296 + max: 102769696 primary_compute_unit: NPU precision: int8 layer_info: @@ -167,14 +99,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jgjv2dm8g + job_id: jgn69wqk5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1047.0 - throughput: 955.1098376313277 + inference_time: 1038.0 + throughput: 963.3911368015414 estimated_peak_memory_range: min: 167936 - max: 22493136 + max: 26490064 primary_compute_unit: NPU precision: int8 layer_info: @@ -182,22 +114,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jprv26jeg - job_status: Passed - torchscript_onnx: - inference_time: 1968.0 - throughput: 508.130081300813 - estimated_peak_memory_range: - min: 0 - max: 153159328 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 283 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 283 - job_id: jgjv2d48g + job_id: jpv61nnk5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -206,13 +123,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:46:48Z' + timestamp: '2024-11-09T23:37:30Z' - torchscript_onnx_tflite: - inference_time: 819.0 - throughput: 1221.001221001221 + inference_time: 793.0 + throughput: 1261.034047919294 estimated_peak_memory_range: min: 8192 - max: 31737200 + max: 31567584 primary_compute_unit: NPU precision: int8 layer_info: @@ -220,14 +137,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jp4ldn0l5 + job_id: jp2k7zdrp job_status: Passed torchscript_onnx_qnn: - inference_time: 1004.0 - throughput: 996.01593625498 + inference_time: 884.0 + throughput: 1131.2217194570135 estimated_peak_memory_range: - min: 159744 - max: 25213152 + min: 0 + max: 23826400 primary_compute_unit: NPU precision: int8 layer_info: @@ -235,22 +152,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: j56yz2l7p - job_status: Passed - torchscript_onnx: - inference_time: 1577.0 - throughput: 634.1154090044388 - estimated_peak_memory_range: - min: 90112 - max: 62339856 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 283 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 283 - job_id: jgz3j2k65 + job_id: jpxk7w015 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -259,13 +161,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:46:51Z' + timestamp: '2024-11-11T13:57:38Z' - torchscript_onnx_tflite: - inference_time: 4480.0 - throughput: 223.21428571428572 + inference_time: 4441.0 + throughput: 225.17451024544022 estimated_peak_memory_range: - min: 36864 - max: 36463040 + min: 12288 + max: 36615520 primary_compute_unit: NPU precision: int8 layer_info: @@ -273,14 +175,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jpedwo105 + job_id: jpy14y28p job_status: Passed torchscript_onnx_qnn: - inference_time: 6562.0 - throughput: 152.39256324291375 + inference_time: 6992.0 + throughput: 143.02059496567506 estimated_peak_memory_range: - min: 192512 - max: 7667280 + min: 610304 + max: 8656272 primary_compute_unit: NPU precision: int8 layer_info: @@ -288,7 +190,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jp2k9xnmp + job_id: jpedrnno5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -297,13 +199,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-27T00:46:28Z' + timestamp: '2024-11-09T23:37:14Z' - torchscript_onnx_tflite: - inference_time: 17079.0 - throughput: 58.5514374377891 + inference_time: 17119.0 + throughput: 58.41462702260646 estimated_peak_memory_range: - min: 57344 - max: 2640928 + min: 65536 + max: 2589472 primary_compute_unit: NPU precision: int8 layer_info: @@ -311,7 +213,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jgz3j2965 + job_id: jp0z1x995 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -320,13 +222,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-27T00:46:08Z' + timestamp: '2024-11-09T23:36:56Z' - torchscript_onnx_tflite: - inference_time: 1156.0 - throughput: 865.0519031141869 + inference_time: 1159.0 + throughput: 862.8127696289905 estimated_peak_memory_range: - min: 20480 - max: 8980672 + min: 12288 + max: 1600200 primary_compute_unit: NPU precision: int8 layer_info: @@ -334,14 +236,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: j5we3wvj5 + job_id: jp8q3krkp job_status: Passed torchscript_onnx_qnn: - inference_time: 1319.0 - throughput: 758.1501137225171 + inference_time: 1320.0 + throughput: 757.5757575757576 estimated_peak_memory_range: - min: 192512 - max: 1396656 + min: 229376 + max: 1343640 primary_compute_unit: NPU precision: int8 layer_info: @@ -349,7 +251,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jpy1jz04p + job_id: jgz3x00o5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -358,13 +260,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:46:30Z' + timestamp: '2024-11-09T23:37:16Z' - torchscript_onnx_tflite: - inference_time: 1153.0 - throughput: 867.3026886383348 + inference_time: 1160.0 + throughput: 862.0689655172414 estimated_peak_memory_range: min: 28672 - max: 5268488 + max: 392731816 primary_compute_unit: NPU precision: int8 layer_info: @@ -372,14 +274,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jp14wlx8p + job_id: jgkelk0wg job_status: Passed torchscript_onnx_qnn: - inference_time: 1323.0 - throughput: 755.8578987150415 + inference_time: 1333.0 + throughput: 750.1875468867216 estimated_peak_memory_range: - min: 217088 - max: 1554352 + min: 176128 + max: 1421560 primary_compute_unit: NPU precision: int8 layer_info: @@ -387,7 +289,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jpxk6j235 + job_id: jg9l3qqwg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -396,13 +298,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T02:05:34Z' + timestamp: '2024-11-09T23:37:20Z' - torchscript_onnx_tflite: - inference_time: 1162.0 - throughput: 860.5851979345955 + inference_time: 1164.0 + throughput: 859.106529209622 estimated_peak_memory_range: - min: 24576 - max: 1483208 + min: 12288 + max: 15061408 primary_compute_unit: NPU precision: int8 layer_info: @@ -410,14 +312,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jgdxq9lrp + job_id: j5q67d1np job_status: Passed torchscript_onnx_qnn: - inference_time: 1330.0 - throughput: 751.8796992481203 + inference_time: 1323.0 + throughput: 755.8578987150415 estimated_peak_memory_range: min: 180224 - max: 1740848 + max: 1319504 primary_compute_unit: NPU precision: int8 layer_info: @@ -425,7 +327,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: j5mn62ydp + job_id: jp14dmm8p job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -434,13 +336,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T02:05:35Z' + timestamp: '2024-11-09T23:37:22Z' - torchscript_onnx_tflite: - inference_time: 1160.0 - throughput: 862.0689655172414 + inference_time: 1168.0 + throughput: 856.1643835616438 estimated_peak_memory_range: - min: 28672 - max: 401472312 + min: 16384 + max: 22161000 primary_compute_unit: NPU precision: int8 layer_info: @@ -448,14 +350,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jp4ldo085 + job_id: jglv0q8j5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1323.0 - throughput: 755.8578987150415 + inference_time: 1330.0 + throughput: 751.8796992481203 estimated_peak_memory_range: - min: 163840 - max: 1929760 + min: 184320 + max: 1455184 primary_compute_unit: NPU precision: int8 layer_info: @@ -463,7 +365,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jprv2qj0g + job_id: jgdxrmmrp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -472,13 +374,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T02:05:39Z' + timestamp: '2024-11-09T23:37:23Z' - torchscript_onnx_tflite: - inference_time: 1773.0 - throughput: 564.0157924421884 + inference_time: 1774.0 + throughput: 563.6978579481398 estimated_peak_memory_range: min: 32768 - max: 29752064 + max: 31521328 primary_compute_unit: NPU precision: int8 layer_info: @@ -486,14 +388,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: j57ylw3v5 + job_id: j56y30m6p job_status: Passed torchscript_onnx_qnn: - inference_time: 2207.0 - throughput: 453.10376076121435 + inference_time: 2076.0 + throughput: 481.6955684007707 estimated_peak_memory_range: min: 0 - max: 5712592 + max: 5868352 primary_compute_unit: NPU precision: int8 layer_info: @@ -501,7 +403,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jgn6my8k5 + job_id: j57yj88v5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -510,13 +412,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T02:05:37Z' + timestamp: '2024-11-09T23:37:25Z' - torchscript_onnx_tflite: - inference_time: 1360.0 - throughput: 735.2941176470588 + inference_time: 1365.0 + throughput: 732.6007326007326 estimated_peak_memory_range: min: 16384 - max: 105448944 + max: 103983728 primary_compute_unit: NPU precision: int8 layer_info: @@ -524,14 +426,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: j57yl23r5 + job_id: jp3j4r73g job_status: Passed torchscript_onnx_qnn: - inference_time: 1596.0 - throughput: 626.5664160401003 + inference_time: 1612.0 + throughput: 620.3473945409429 estimated_peak_memory_range: - min: 167936 - max: 24443936 + min: 163840 + max: 24126128 primary_compute_unit: NPU precision: int8 layer_info: @@ -539,7 +441,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jglv24rl5 + job_id: jp4lx2285 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -548,13 +450,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:46:40Z' + timestamp: '2024-11-09T23:37:27Z' - torchscript_onnx_qnn: - inference_time: 1328.0 - throughput: 753.0120481927711 + inference_time: 1313.0 + throughput: 761.6146230007616 estimated_peak_memory_range: - min: 487424 - max: 487424 + min: 360448 + max: 360448 primary_compute_unit: NPU precision: int8 layer_info: @@ -562,22 +464,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jp0z247e5 - job_status: Passed - torchscript_onnx: - inference_time: 2334.0 - throughput: 428.4490145672665 - estimated_peak_memory_range: - min: 48701440 - max: 48701440 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 283 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 283 - job_id: jpedwo305 + job_id: j5wedrr35 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -586,4 +473,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:46:49Z' + timestamp: '2024-11-09T23:37:34Z' diff --git a/qai_hub_models/models/resnet18/README.md b/qai_hub_models/models/resnet18/README.md index 299ae472..6929c3c6 100644 --- a/qai_hub_models/models/resnet18/README.md +++ b/qai_hub_models/models/resnet18/README.md @@ -5,8 +5,7 @@ ResNet18 is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of ResNet18 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/resnet18). diff --git a/qai_hub_models/models/resnet18/perf.yaml b/qai_hub_models/models/resnet18/perf.yaml index 8fd325f8..0ea8517c 100644 --- a/qai_hub_models/models/resnet18/perf.yaml +++ b/qai_hub_models/models/resnet18/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,70 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: ResNet18 performance_metrics: - - torchscript_onnx_tflite: - inference_time: 1384.0 - throughput: 722.543352601156 - estimated_peak_memory_range: - min: 28672 - max: 1684672 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 38 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 38 - job_id: jpede1315 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 1473.0 - throughput: 678.8866259334691 - estimated_peak_memory_range: - min: 16384 - max: 92762712 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 53 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 53 - job_id: jgdxel1rp - job_status: Passed - torchscript_onnx: - inference_time: 1355.0 - throughput: 738.0073800738007 - estimated_peak_memory_range: - min: 20480 - max: 26827152 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 55 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 55 - job_id: jp8q0vykp - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:27:30Z' - torchscript_onnx_tflite: inference_time: 1383.0 throughput: 723.0657989877079 estimated_peak_memory_range: - min: 32768 - max: 2515224 + min: 28672 + max: 5786216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: jgz3o9kk5 + job_id: jglv0xdm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1463.0 - throughput: 683.526999316473 + inference_time: 1469.0 + throughput: 680.7351940095303 estimated_peak_memory_range: - min: 344064 - max: 92858240 + min: 335872 + max: 82307328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: j57y03rv5 + job_id: jg9l3mr8g job_status: Passed torchscript_onnx: - inference_time: 1334.0 - throughput: 749.6251874062968 + inference_time: 1352.0 + throughput: 739.6449704142012 estimated_peak_memory_range: - min: 28672 - max: 26014264 + min: 12288 + max: 26210240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: jgke7mxwg + job_id: j5mnwm77p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -145,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:27:31Z' + timestamp: '2024-11-09T22:16:54Z' - torchscript_onnx_tflite: - inference_time: 1077.0 - throughput: 928.5051067780872 + inference_time: 968.0 + throughput: 1033.0578512396694 estimated_peak_memory_range: min: 16384 - max: 29004512 + max: 29877872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: j5we2n665 + job_id: j56y37xyp job_status: Passed torchscript_onnx_qnn: - inference_time: 1114.0 - throughput: 897.6660682226212 + inference_time: 1007.0 + throughput: 993.0486593843099 estimated_peak_memory_range: min: 618496 - max: 13877984 + max: 14338176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jp4lk0r85 + job_id: jp14dj97p job_status: Passed torchscript_onnx: - inference_time: 1144.0 - throughput: 874.1258741258741 + inference_time: 966.0 + throughput: 1035.1966873706003 estimated_peak_memory_range: min: 0 - max: 29883088 + max: 30908144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,7 +138,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: j5q6eoqnp + job_id: jgn69n4j5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -198,13 +147,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:27:32Z' + timestamp: '2024-11-09T22:16:55Z' - torchscript_onnx_tflite: - inference_time: 956.0 - throughput: 1046.0251046025105 + inference_time: 959.0 + throughput: 1042.752867570386 estimated_peak_memory_range: min: 12288 - max: 16447328 + max: 16323104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: jp14yxz8p + job_id: jp3j49dng job_status: Passed torchscript_onnx_qnn: - inference_time: 853.0 - throughput: 1172.3329425556858 + inference_time: 871.0 + throughput: 1148.105625717566 estimated_peak_memory_range: - min: 614400 - max: 11094960 + min: 0 + max: 11067408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jp0z37095 + job_id: jgdxr3kzp job_status: Passed torchscript_onnx: - inference_time: 968.0 - throughput: 1033.0578512396694 + inference_time: 826.0 + throughput: 1210.6537530266344 estimated_peak_memory_range: min: 0 - max: 16001328 + max: 15787600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -242,7 +191,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: j56yel46p + job_id: jprv40rkg job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -251,13 +200,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:27:34Z' + timestamp: '2024-11-09T22:16:56Z' - torchscript_onnx_tflite: - inference_time: 1379.0 - throughput: 725.1631617113851 + inference_time: 1382.0 + throughput: 723.589001447178 estimated_peak_memory_range: - min: 28672 - max: 1398944 + min: 0 + max: 1506736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: jg9ljenlg + job_id: jgo21rxkp job_status: Passed torchscript_onnx_qnn: inference_time: 1318.0 throughput: 758.7253414264036 estimated_peak_memory_range: - min: 626688 - max: 2069456 + min: 630784 + max: 1748800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,7 +229,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jpxkn2o35 + job_id: j5wed7k45 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -289,13 +238,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:27:22Z' + timestamp: '2024-11-09T22:16:47Z' - torchscript_onnx_tflite: inference_time: 1384.0 throughput: 722.543352601156 estimated_peak_memory_range: - min: 12288 - max: 2322080 + min: 24576 + max: 1864432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: j5q6r8lmp + job_id: jpv61d8r5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1324.0 - throughput: 755.2870090634441 + inference_time: 1331.0 + throughput: 751.3148009015778 estimated_peak_memory_range: min: 634880 - max: 2006760 + max: 1953384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,7 +267,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jp3j1kzzg + job_id: jp14dj9np job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -327,13 +276,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:19:01Z' + timestamp: '2024-11-09T22:16:49Z' - torchscript_onnx_tflite: - inference_time: 1386.0 - throughput: 721.5007215007215 + inference_time: 1385.0 + throughput: 722.0216606498195 estimated_peak_memory_range: - min: 40960 - max: 1660856 + min: 28672 + max: 1434864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: jglv2nyl5 + job_id: jgjv079eg job_status: Passed torchscript_onnx_qnn: - inference_time: 1325.0 - throughput: 754.7169811320755 + inference_time: 1316.0 + throughput: 759.8784194528876 estimated_peak_memory_range: - min: 630784 - max: 1815504 + min: 634880 + max: 1936216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,7 +305,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jgo2nyldp + job_id: jgdxr3k6p job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -365,13 +314,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:19:02Z' + timestamp: '2024-11-09T22:16:50Z' - torchscript_onnx_tflite: - inference_time: 1385.0 - throughput: 722.0216606498195 + inference_time: 1383.0 + throughput: 723.0657989877079 estimated_peak_memory_range: - min: 221184 - max: 247239088 + min: 16384 + max: 2483200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: j56yz687p + job_id: jpedrzqv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1325.0 - throughput: 754.7169811320755 + inference_time: 1326.0 + throughput: 754.1478129713424 estimated_peak_memory_range: - min: 0 - max: 1546576 + min: 626688 + max: 2015984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,7 +343,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jpv6r3lm5 + job_id: j57yj4mn5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -403,13 +352,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:19:03Z' + timestamp: '2024-11-09T22:16:51Z' - torchscript_onnx_tflite: - inference_time: 1946.0 - throughput: 513.874614594039 + inference_time: 2489.0 + throughput: 401.76777822418643 estimated_peak_memory_range: - min: 16384 - max: 28946416 + min: 12288 + max: 14853840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +366,52 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: jg9ljenwg + job_id: jgz3xm6x5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1979.0 - throughput: 505.3057099545225 + inference_time: 2451.0 + throughput: 407.9967360261118 + estimated_peak_memory_range: + min: 0 + max: 5967152 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 53 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 53 + job_id: jp4lx1725 + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:16:52Z' + - torchscript_onnx_tflite: + inference_time: 1950.0 + throughput: 512.8205128205128 + estimated_peak_memory_range: + min: 12288 + max: 29158080 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 38 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 38 + job_id: j5wed7km5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1988.0 + throughput: 503.01810865191146 estimated_peak_memory_range: min: 618496 - max: 18157312 + max: 16730944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jpy1r038p + job_id: jpxk74q85 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:27:28Z' + timestamp: '2024-11-09T22:16:53Z' - torchscript_onnx_qnn: - inference_time: 1438.0 - throughput: 695.4102920723227 + inference_time: 1442.0 + throughput: 693.4812760055479 estimated_peak_memory_range: min: 602112 max: 602112 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: j5mnqyxdp + job_id: jg9l3mrmg job_status: Passed torchscript_onnx: - inference_time: 1325.0 - throughput: 754.7169811320755 + inference_time: 1334.0 + throughput: 749.6251874062968 estimated_peak_memory_range: - min: 24285184 - max: 24285184 + min: 24240128 + max: 24240128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: jglv6rmj5 + job_id: jp2k7w16p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:27:33Z' + timestamp: '2024-11-09T22:16:57Z' diff --git a/qai_hub_models/models/resnet18_quantized/README.md b/qai_hub_models/models/resnet18_quantized/README.md index 907fffb5..549efa42 100644 --- a/qai_hub_models/models/resnet18_quantized/README.md +++ b/qai_hub_models/models/resnet18_quantized/README.md @@ -5,8 +5,7 @@ ResNet18 is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of ResNet18Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/resnet18_quantized). diff --git a/qai_hub_models/models/resnet18_quantized/perf.yaml b/qai_hub_models/models/resnet18_quantized/perf.yaml index 23ec3d42..401cb6e9 100644 --- a/qai_hub_models/models/resnet18_quantized/perf.yaml +++ b/qai_hub_models/models/resnet18_quantized/perf.yaml @@ -49,64 +49,11 @@ models: - name: ResNet18Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 410.0 - throughput: 2439.0243902439024 - estimated_peak_memory_range: - min: 16384 - max: 6811696 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 41 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 41 - job_id: jp2k9x8mp - job_status: Passed - torchscript_onnx_qnn: - inference_time: 627.0 - throughput: 1594.896331738437 - estimated_peak_memory_range: - min: 16384 - max: 18715928 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 54 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 54 - job_id: jpedwo905 - job_status: Passed - torchscript_onnx: - inference_time: 679.0 - throughput: 1472.7540500736377 - estimated_peak_memory_range: - min: 16384 - max: 13961904 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 74 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 74 - job_id: jp2k9x6mp - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:45:31Z' - - torchscript_onnx_tflite: - inference_time: 408.0 - throughput: 2450.9803921568628 + inference_time: 406.0 + throughput: 2463.054187192118 estimated_peak_memory_range: min: 12288 - max: 1331736 + max: 61897168 primary_compute_unit: NPU precision: int8 layer_info: @@ -114,14 +61,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jpy1jze4p + job_id: jgdxrm6ep job_status: Passed torchscript_onnx_qnn: - inference_time: 624.0 - throughput: 1602.5641025641025 + inference_time: 632.0 + throughput: 1582.2784810126582 estimated_peak_memory_range: min: 16384 - max: 8169464 + max: 8166816 primary_compute_unit: NPU precision: int8 layer_info: @@ -129,22 +76,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jgz3j2e65 - job_status: Passed - torchscript_onnx: - inference_time: 673.0 - throughput: 1485.8841010401188 - estimated_peak_memory_range: - min: 69632 - max: 1608928 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 74 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 74 - job_id: jpy1jzw4p + job_id: jp2k7zorp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -153,13 +85,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:45:33Z' + timestamp: '2024-11-09T23:36:16Z' - torchscript_onnx_tflite: - inference_time: 312.0 - throughput: 3205.128205128205 + inference_time: 320.0 + throughput: 3125.0 estimated_peak_memory_range: - min: 16384 - max: 27489904 + min: 12288 + max: 28081344 primary_compute_unit: NPU precision: int8 layer_info: @@ -167,14 +99,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jp0z24ye5 + job_id: j5wedry35 job_status: Passed torchscript_onnx_qnn: inference_time: 476.0 throughput: 2100.840336134454 estimated_peak_memory_range: min: 163840 - max: 13261248 + max: 12874960 primary_compute_unit: NPU precision: int8 layer_info: @@ -182,22 +114,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: j5we3woj5 - job_status: Passed - torchscript_onnx: - inference_time: 502.0 - throughput: 1992.03187250996 - estimated_peak_memory_range: - min: 12288 - max: 35072448 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 74 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 74 - job_id: jp0z24qe5 + job_id: jpy14y88p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -206,13 +123,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:45:34Z' + timestamp: '2024-11-09T23:36:17Z' - torchscript_onnx_tflite: - inference_time: 269.0 - throughput: 3717.472118959108 + inference_time: 292.0 + throughput: 3424.6575342465753 estimated_peak_memory_range: - min: 8192 - max: 16154624 + min: 12288 + max: 16028640 primary_compute_unit: NPU precision: int8 layer_info: @@ -220,14 +137,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jpv6rq3m5 + job_id: jg9l3qowg job_status: Passed torchscript_onnx_qnn: - inference_time: 384.0 - throughput: 2604.1666666666665 + inference_time: 450.0 + throughput: 2222.222222222222 estimated_peak_memory_range: - min: 159744 - max: 10308272 + min: 0 + max: 10881776 primary_compute_unit: NPU precision: int8 layer_info: @@ -235,22 +152,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jgn6m0ym5 - job_status: Passed - torchscript_onnx: - inference_time: 464.0 - throughput: 2155.1724137931033 - estimated_peak_memory_range: - min: 0 - max: 20259728 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 74 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 74 - job_id: jgkeqvnog + job_id: jp8q3kjkp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -259,13 +161,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:45:38Z' + timestamp: '2024-11-09T23:36:19Z' - torchscript_onnx_tflite: - inference_time: 1649.0 - throughput: 606.4281382656155 + inference_time: 1573.0 + throughput: 635.7279084551811 estimated_peak_memory_range: min: 12288 - max: 18000576 + max: 17443952 primary_compute_unit: NPU precision: int8 layer_info: @@ -273,14 +175,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jp8qm2o8p + job_id: jp14dmo8p job_status: Passed torchscript_onnx_qnn: - inference_time: 2017.0 - throughput: 495.785820525533 + inference_time: 2140.0 + throughput: 467.2897196261682 estimated_peak_memory_range: - min: 32768 - max: 8416880 + min: 163840 + max: 8460960 primary_compute_unit: NPU precision: int8 layer_info: @@ -288,7 +190,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jg9ly0vvg + job_id: jgkelk6wg job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -297,13 +199,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-27T00:45:14Z' + timestamp: '2024-11-09T23:36:00Z' - torchscript_onnx_tflite: - inference_time: 7141.0 - throughput: 140.0364094664613 + inference_time: 7062.0 + throughput: 141.6029453412631 estimated_peak_memory_range: - min: 65536 - max: 2452072 + min: 36864 + max: 7375936 primary_compute_unit: NPU precision: int8 layer_info: @@ -311,7 +213,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jgkeqvzog + job_id: jgdxrm6rp job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -320,13 +222,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-27T00:44:55Z' + timestamp: '2024-11-09T23:35:42Z' - torchscript_onnx_tflite: - inference_time: 405.0 - throughput: 2469.135802469136 + inference_time: 408.0 + throughput: 2450.9803921568628 estimated_peak_memory_range: min: 12288 - max: 1376544 + max: 1251976 primary_compute_unit: NPU precision: int8 layer_info: @@ -334,14 +236,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: j5q6r08mp + job_id: j57yj8ov5 job_status: Passed torchscript_onnx_qnn: - inference_time: 598.0 - throughput: 1672.2408026755852 + inference_time: 599.0 + throughput: 1669.449081803005 estimated_peak_memory_range: - min: 184320 - max: 1393280 + min: 200704 + max: 1874544 primary_compute_unit: NPU precision: int8 layer_info: @@ -349,7 +251,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jp14w20lp + job_id: j5q67d4np job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -358,13 +260,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:45:16Z' + timestamp: '2024-11-09T23:36:02Z' - torchscript_onnx_tflite: - inference_time: 396.0 - throughput: 2525.252525252525 + inference_time: 406.0 + throughput: 2463.054187192118 estimated_peak_memory_range: min: 12288 - max: 28821024 + max: 1362768 primary_compute_unit: NPU precision: int8 layer_info: @@ -372,14 +274,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jp3j133lg + job_id: jp4lx2e85 job_status: Passed torchscript_onnx_qnn: - inference_time: 599.0 - throughput: 1669.449081803005 + inference_time: 604.0 + throughput: 1655.6291390728477 estimated_peak_memory_range: - min: 184320 - max: 1482648 + min: 217088 + max: 1462592 primary_compute_unit: NPU precision: int8 layer_info: @@ -387,7 +289,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jpedw1115 + job_id: j56y30o6p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -396,13 +298,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T02:05:00Z' + timestamp: '2024-11-09T23:36:06Z' - torchscript_onnx_tflite: - inference_time: 413.0 - throughput: 2421.3075060532688 + inference_time: 401.0 + throughput: 2493.7655860349128 estimated_peak_memory_range: - min: 12288 - max: 15096672 + min: 16384 + max: 1347352 primary_compute_unit: NPU precision: int8 layer_info: @@ -410,14 +312,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jgo2n00xp + job_id: jpxk7z035 job_status: Passed torchscript_onnx_qnn: - inference_time: 604.0 - throughput: 1655.6291390728477 + inference_time: 598.0 + throughput: 1672.2408026755852 estimated_peak_memory_range: - min: 184320 - max: 1922400 + min: 180224 + max: 1947192 primary_compute_unit: NPU precision: int8 layer_info: @@ -425,7 +327,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jgz3j99k5 + job_id: jp3j4ro3g job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -434,13 +336,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T02:05:01Z' + timestamp: '2024-11-09T23:36:08Z' - torchscript_onnx_tflite: - inference_time: 407.0 - throughput: 2457.002457002457 + inference_time: 410.0 + throughput: 2439.0243902439024 estimated_peak_memory_range: - min: 32768 - max: 11009304 + min: 12288 + max: 1275872 primary_compute_unit: NPU precision: int8 layer_info: @@ -448,14 +350,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jgjv2mmxg + job_id: j5mnwl9dp job_status: Passed torchscript_onnx_qnn: - inference_time: 602.0 - throughput: 1661.1295681063123 + inference_time: 601.0 + throughput: 1663.8935108153078 estimated_peak_memory_range: - min: 53248 - max: 1185672 + min: 208896 + max: 1558072 primary_compute_unit: NPU precision: int8 layer_info: @@ -463,7 +365,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jg9ly1elg + job_id: jgo219dqp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -472,13 +374,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T02:05:05Z' + timestamp: '2024-11-09T23:36:10Z' - torchscript_onnx_tflite: - inference_time: 777.0 - throughput: 1287.001287001287 + inference_time: 795.0 + throughput: 1257.861635220126 estimated_peak_memory_range: min: 12288 - max: 15631792 + max: 15628224 primary_compute_unit: NPU precision: int8 layer_info: @@ -486,14 +388,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jpv6rooj5 + job_id: jgn69w1k5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1105.0 - throughput: 904.9773755656108 + inference_time: 1084.0 + throughput: 922.509225092251 estimated_peak_memory_range: min: 0 - max: 5939312 + max: 5893552 primary_compute_unit: NPU precision: int8 layer_info: @@ -501,7 +403,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: j5we3vn65 + job_id: jpv61nmk5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -510,13 +412,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T02:05:03Z' + timestamp: '2024-11-09T23:36:12Z' - torchscript_onnx_tflite: - inference_time: 469.0 - throughput: 2132.1961620469083 + inference_time: 475.0 + throughput: 2105.2631578947367 estimated_peak_memory_range: min: 16384 - max: 28217472 + max: 27186432 primary_compute_unit: NPU precision: int8 layer_info: @@ -524,14 +426,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jgo2nzydp + job_id: jprv47x0g job_status: Passed torchscript_onnx_qnn: inference_time: 700.0 throughput: 1428.5714285714287 estimated_peak_memory_range: min: 163840 - max: 14151968 + max: 14250816 primary_compute_unit: NPU precision: int8 layer_info: @@ -539,7 +441,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: j5mn6e2qp + job_id: jgjv08yvg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -548,13 +450,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:45:26Z' + timestamp: '2024-11-09T23:36:14Z' - torchscript_onnx_qnn: - inference_time: 687.0 - throughput: 1455.604075691412 + inference_time: 685.0 + throughput: 1459.85401459854 estimated_peak_memory_range: - min: 610304 - max: 610304 + min: 425984 + max: 425984 primary_compute_unit: NPU precision: int8 layer_info: @@ -562,22 +464,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jgdxqnwlp + job_id: jglv0qwj5 job_status: Passed torchscript_onnx: - inference_time: 743.0 - throughput: 1345.8950201884254 + inference_time: 104850.0 + throughput: 9.537434430138292 estimated_peak_memory_range: - min: 13533184 - max: 13533184 + min: 121454592 + max: 121454592 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 74 + layers_on_npu: 143 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 74 - job_id: jp8qm298p + layers_on_cpu: 20 + total_layers: 163 + job_id: jg9l3q2wg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -586,4 +488,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:45:36Z' + timestamp: '2024-11-09T23:36:20Z' diff --git a/qai_hub_models/models/resnet50/README.md b/qai_hub_models/models/resnet50/README.md index 38950979..f24b2ab4 100644 --- a/qai_hub_models/models/resnet50/README.md +++ b/qai_hub_models/models/resnet50/README.md @@ -5,8 +5,7 @@ ResNet50 is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of ResNet50 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/resnet50). diff --git a/qai_hub_models/models/resnet50/perf.yaml b/qai_hub_models/models/resnet50/perf.yaml index 014e0e3f..3b7a2774 100644 --- a/qai_hub_models/models/resnet50/perf.yaml +++ b/qai_hub_models/models/resnet50/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: ResNet50 performance_metrics: - torchscript_onnx_tflite: - inference_time: 2283.0 - throughput: 438.02014892685065 + inference_time: 2278.0 + throughput: 438.98156277436345 estimated_peak_memory_range: - min: 16384 - max: 2068880 + min: 24576 + max: 1880392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: j56yejj0p + job_id: jp0z1jxn5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2381.0 - throughput: 419.99160016799664 + inference_time: 2391.0 + throughput: 418.23504809703053 estimated_peak_memory_range: - min: 24576 - max: 181455832 + min: 622592 + max: 181640584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jp14ylx2p + job_id: jgjv076eg job_status: Passed torchscript_onnx: - inference_time: 2310.0 - throughput: 432.9004329004329 + inference_time: 2312.0 + throughput: 432.52595155709344 estimated_peak_memory_range: - min: 360448 - max: 2174840 + min: 532480 + max: 2387656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jp8q09vxp + job_id: j5mnwm19p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:26:49Z' + timestamp: '2024-11-09T22:16:13Z' - torchscript_onnx_tflite: - inference_time: 2272.0 - throughput: 440.14084507042253 + inference_time: 1605.0 + throughput: 623.0529595015577 estimated_peak_memory_range: min: 16384 - max: 2462728 + max: 80566672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jp3jv33lg + job_id: jp8q3xkop job_status: Passed torchscript_onnx_qnn: - inference_time: 2394.0 - throughput: 417.7109440267335 + inference_time: 1701.0 + throughput: 587.8894767783656 estimated_peak_memory_range: - min: 618496 - max: 182169320 + min: 0 + max: 28072944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgdxe9lep + job_id: jpedrz0v5 job_status: Passed torchscript_onnx: - inference_time: 2353.0 - throughput: 424.9893752656184 + inference_time: 1702.0 + throughput: 587.5440658049354 estimated_peak_memory_range: - min: 12288 - max: 60455176 + min: 618496 + max: 84054640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jgke7nm2g + job_id: jgn69ndq5 job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:26:50Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T22:16:14Z' - torchscript_onnx_tflite: - inference_time: 1782.0 - throughput: 561.1672278338945 + inference_time: 1548.0 + throughput: 645.9948320413437 estimated_peak_memory_range: - min: 16384 - max: 80124432 + min: 12288 + max: 31195280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jgo2k00xp + job_id: jgkel4kng job_status: Passed torchscript_onnx_qnn: - inference_time: 1876.0 - throughput: 533.0490405117271 + inference_time: 1669.0 + throughput: 599.1611743559017 estimated_peak_memory_range: - min: 618496 - max: 25902432 + min: 0 + max: 23159552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jp4lko0v5 + job_id: jgz3xmqx5 job_status: Passed torchscript_onnx: - inference_time: 1962.0 - throughput: 509.683995922528 + inference_time: 1651.0 + throughput: 605.6935190793458 estimated_peak_memory_range: min: 0 - max: 81828304 + max: 31162640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: j5q6eko4p + job_id: jprv40m7g job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:26:51Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T22:16:15Z' - torchscript_onnx_tflite: - inference_time: 1576.0 - throughput: 634.5177664974619 + inference_time: 2253.0 + throughput: 443.85264092321347 estimated_peak_memory_range: - min: 12288 - max: 31360112 + min: 24576 + max: 1971424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jg9lj1elg + job_id: j5q67ydop job_status: Passed torchscript_onnx_qnn: - inference_time: 1402.0 - throughput: 713.2667617689016 + inference_time: 2163.0 + throughput: 462.32085067036525 estimated_peak_memory_range: - min: 0 - max: 24001104 + min: 638976 + max: 1873600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jp0z3q765 - job_status: Passed - torchscript_onnx: - inference_time: 1655.0 - throughput: 604.2296072507553 - estimated_peak_memory_range: - min: 0 - max: 31966384 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 128 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 128 - job_id: j56yejl0p + job_id: j5wed70m5 job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:26:53Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T22:16:06Z' - torchscript_onnx_tflite: - inference_time: 2253.0 - throughput: 443.85264092321347 + inference_time: 2272.0 + throughput: 440.14084507042253 estimated_peak_memory_range: - min: 151552 - max: 1616056 + min: 16384 + max: 2076856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jpv60ooj5 + job_id: jglv0x9m5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2166.0 - throughput: 461.6805170821791 + inference_time: 2180.0 + throughput: 458.7155963302752 estimated_peak_memory_range: - min: 634880 - max: 1977936 + min: 28672 + max: 1476232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jpxknj215 + job_id: jp14djk7p job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:26:41Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T22:16:08Z' - torchscript_onnx_tflite: - inference_time: 2271.0 - throughput: 440.33465433729634 + inference_time: 2276.0 + throughput: 439.3673110720562 estimated_peak_memory_range: - min: 0 - max: 6635952 + min: 28672 + max: 1960960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jpxk6vl95 + job_id: j56y379yp job_status: Passed torchscript_onnx_qnn: - inference_time: 2179.0 - throughput: 458.9261128958238 + inference_time: 2172.0 + throughput: 460.4051565377532 estimated_peak_memory_range: min: 634880 - max: 2255072 + max: 2199984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jprv2kleg + job_id: jgdxr3yzp job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:18:41Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T22:16:09Z' - torchscript_onnx_tflite: - inference_time: 2279.0 - throughput: 438.7889425186485 + inference_time: 2275.0 + throughput: 439.56043956043953 estimated_peak_memory_range: - min: 16384 - max: 2365696 + min: 20480 + max: 1881320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: j5mn6r0qp + job_id: jp3j49lng job_status: Passed torchscript_onnx_qnn: - inference_time: 2181.0 - throughput: 458.50527281063734 + inference_time: 2177.0 + throughput: 459.34772622875516 estimated_peak_memory_range: min: 634880 - max: 2124032 + max: 1855600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jp2k98rmp + job_id: j57yj4195 job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:18:42Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T22:16:10Z' - torchscript_onnx_tflite: - inference_time: 2279.0 - throughput: 438.7889425186485 + inference_time: 3766.0 + throughput: 265.5337227827934 estimated_peak_memory_range: min: 16384 - max: 1921296 + max: 21584640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jgn6m2zm5 + job_id: jgo21r7kp job_status: Passed torchscript_onnx_qnn: - inference_time: 2179.0 - throughput: 458.9261128958238 + inference_time: 3872.0 + throughput: 258.26446280991735 estimated_peak_memory_range: - min: 655360 - max: 1809848 + min: 618496 + max: 6517488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jpy1jeo4p + job_id: jp4lx1615 job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:18:43Z' + chipset: SA8295P + timestamp: '2024-11-09T22:16:11Z' - torchscript_onnx_tflite: - inference_time: 3079.0 - throughput: 324.78077297823967 + inference_time: 3085.0 + throughput: 324.14910858995137 estimated_peak_memory_range: min: 16384 - max: 67143744 + max: 67449744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: j5we2vn65 + job_id: jpv61dyr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3165.0 - throughput: 315.955766192733 + inference_time: 3137.0 + throughput: 318.77590054191904 estimated_peak_memory_range: - min: 647168 - max: 18326992 + min: 618496 + max: 18979856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jpy1rw07p + job_id: jpxk748l5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:26:47Z' + timestamp: '2024-11-09T22:16:12Z' - torchscript_onnx_qnn: - inference_time: 2327.0 - throughput: 429.7378599054577 + inference_time: 2320.0 + throughput: 431.0344827586207 estimated_peak_memory_range: min: 602112 max: 602112 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j5mnq2ywp + job_id: jg9l3m78g job_status: Passed torchscript_onnx: - inference_time: 2335.0 - throughput: 428.2655246252677 + inference_time: 2357.0 + throughput: 424.26813746287655 estimated_peak_memory_range: - min: 52412416 - max: 52412416 + min: 53592064 + max: 53592064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jglv6zr85 + job_id: jp2k7wqqp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:26:52Z' + timestamp: '2024-11-09T22:16:16Z' diff --git a/qai_hub_models/models/resnet50_quantized/README.md b/qai_hub_models/models/resnet50_quantized/README.md index 726fb4b4..30716929 100644 --- a/qai_hub_models/models/resnet50_quantized/README.md +++ b/qai_hub_models/models/resnet50_quantized/README.md @@ -5,8 +5,7 @@ ResNet50 is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of ResNet50Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/resnet50_quantized). diff --git a/qai_hub_models/models/resnet50_quantized/perf.yaml b/qai_hub_models/models/resnet50_quantized/perf.yaml index 1fcca721..2b0c7fb1 100644 --- a/qai_hub_models/models/resnet50_quantized/perf.yaml +++ b/qai_hub_models/models/resnet50_quantized/perf.yaml @@ -46,64 +46,11 @@ models: - name: ResNet50Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 785.0 - throughput: 1273.8853503184714 - estimated_peak_memory_range: - min: 40960 - max: 1377744 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 82 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 82 - job_id: jg9ly0rvg - job_status: Passed - torchscript_onnx_qnn: - inference_time: 1001.0 - throughput: 999.000999000999 - estimated_peak_memory_range: - min: 12288 - max: 33002936 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 127 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 127 - job_id: jp8qm2l8p - job_status: Passed - torchscript_onnx: - inference_time: 1526.0 - throughput: 655.307994757536 - estimated_peak_memory_range: - min: 12288 - max: 30868048 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 147 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 147 - job_id: jg9ly06vg - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:44:22Z' - - torchscript_onnx_tflite: - inference_time: 785.0 - throughput: 1273.8853503184714 + inference_time: 787.0 + throughput: 1270.6480304955528 estimated_peak_memory_range: min: 12288 - max: 32244416 + max: 17071840 primary_compute_unit: NPU precision: int8 layer_info: @@ -111,14 +58,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jp14w29lp + job_id: jgjv087xg job_status: Passed torchscript_onnx_qnn: - inference_time: 1002.0 - throughput: 998.003992015968 + inference_time: 999.0 + throughput: 1001.001001001001 estimated_peak_memory_range: - min: 12288 - max: 33064448 + min: 16384 + max: 273291200 primary_compute_unit: NPU precision: int8 layer_info: @@ -126,22 +73,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jgkeqvjog - job_status: Passed - torchscript_onnx: - inference_time: 1534.0 - throughput: 651.8904823989569 - estimated_peak_memory_range: - min: 12288 - max: 30825576 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 147 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 147 - job_id: jp14w2rlp + job_id: jgn69wzr5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -150,13 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:44:24Z' + timestamp: '2024-11-09T23:35:07Z' - torchscript_onnx_tflite: - inference_time: 644.0 - throughput: 1552.7950310559006 + inference_time: 590.0 + throughput: 1694.915254237288 estimated_peak_memory_range: min: 12288 - max: 65625664 + max: 66490720 primary_compute_unit: NPU precision: int8 layer_info: @@ -164,14 +96,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jgdxqnklp + job_id: jpedrnz15 job_status: Passed torchscript_onnx_qnn: - inference_time: 752.0 - throughput: 1329.787234042553 + inference_time: 779.0 + throughput: 1283.6970474967907 estimated_peak_memory_range: - min: 167936 - max: 17175008 + min: 0 + max: 18957248 primary_compute_unit: NPU precision: int8 layer_info: @@ -179,22 +111,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: j5q6r0jmp - job_status: Passed - torchscript_onnx: - inference_time: 1102.0 - throughput: 907.4410163339383 - estimated_peak_memory_range: - min: 155648 - max: 97403312 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 147 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 147 - job_id: jgdxqnjlp + job_id: jprv47l9g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -203,13 +120,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:44:25Z' + timestamp: '2024-11-09T23:35:09Z' - torchscript_onnx_tflite: - inference_time: 546.0 - throughput: 1831.5018315018315 + inference_time: 506.0 + throughput: 1976.2845849802372 estimated_peak_memory_range: min: 12288 - max: 24146272 + max: 23750288 primary_compute_unit: NPU precision: int8 layer_info: @@ -217,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jpy1jz94p + job_id: jgz3x0mk5 job_status: Passed torchscript_onnx_qnn: - inference_time: 721.0 - throughput: 1386.9625520110958 + inference_time: 722.0 + throughput: 1385.0415512465374 estimated_peak_memory_range: min: 0 - max: 17716496 + max: 15808240 primary_compute_unit: NPU precision: int8 layer_info: @@ -232,22 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jgz3j2165 - job_status: Passed - torchscript_onnx: - inference_time: 919.0 - throughput: 1088.139281828074 - estimated_peak_memory_range: - min: 0 - max: 40365616 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 147 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 147 - job_id: jp4ldnql5 + job_id: jp2k7zr4p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -256,13 +158,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:44:29Z' + timestamp: '2024-11-09T23:35:11Z' - torchscript_onnx_tflite: - inference_time: 2831.0 - throughput: 353.2320734722713 + inference_time: 2693.0 + throughput: 371.3330857779428 estimated_peak_memory_range: min: 12288 - max: 27161360 + max: 26599584 primary_compute_unit: NPU precision: int8 layer_info: @@ -270,14 +172,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: j57yl2qr5 + job_id: j5wedrl65 job_status: Passed torchscript_onnx_qnn: - inference_time: 4021.0 - throughput: 248.69435463814972 + inference_time: 4071.0 + throughput: 245.63989191844755 estimated_peak_memory_range: - min: 12288 - max: 8635136 + min: 450560 + max: 8499344 primary_compute_unit: NPU precision: int8 layer_info: @@ -285,7 +187,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jglv24jl5 + job_id: jpy14yo7p job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -294,13 +196,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-27T00:44:06Z' + timestamp: '2024-11-09T23:34:52Z' - torchscript_onnx_tflite: - inference_time: 11742.0 - throughput: 85.16436722875149 + inference_time: 11509.0 + throughput: 86.88852202624034 estimated_peak_memory_range: min: 16384 - max: 6911200 + max: 2214880 primary_compute_unit: NPU precision: int8 layer_info: @@ -308,7 +210,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jp4ldnzl5 + job_id: jg9l3qzlg job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -317,13 +219,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-27T00:43:46Z' + timestamp: '2024-11-09T23:34:34Z' - torchscript_onnx_tflite: - inference_time: 780.0 - throughput: 1282.051282051282 + inference_time: 775.0 + throughput: 1290.3225806451612 estimated_peak_memory_range: - min: 12288 - max: 45439096 + min: 16384 + max: 1323288 primary_compute_unit: NPU precision: int8 layer_info: @@ -331,14 +233,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jpxk69w95 + job_id: jp14dmn2p job_status: Passed torchscript_onnx_qnn: - inference_time: 945.0 - throughput: 1058.2010582010582 + inference_time: 944.0 + throughput: 1059.322033898305 estimated_peak_memory_range: - min: 188416 - max: 1452480 + min: 176128 + max: 1464256 primary_compute_unit: NPU precision: int8 layer_info: @@ -346,7 +248,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: j56yz2k7p + job_id: jp0z1xm65 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -355,13 +257,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:44:08Z' + timestamp: '2024-11-09T23:34:54Z' - torchscript_onnx_tflite: - inference_time: 782.0 - throughput: 1278.772378516624 + inference_time: 790.0 + throughput: 1265.8227848101267 estimated_peak_memory_range: - min: 36864 - max: 1416656 + min: 16384 + max: 1579536 primary_compute_unit: NPU precision: int8 layer_info: @@ -369,14 +271,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jp14wll2p + job_id: jgdxrmdep job_status: Passed torchscript_onnx_qnn: - inference_time: 948.0 - throughput: 1054.8523206751054 + inference_time: 951.0 + throughput: 1051.5247108307046 estimated_peak_memory_range: - min: 204800 - max: 1640152 + min: 196608 + max: 1422160 primary_compute_unit: NPU precision: int8 layer_info: @@ -384,7 +286,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jpxk6jj15 + job_id: jgkelk22g job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -393,13 +295,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T02:04:31Z' + timestamp: '2024-11-09T23:34:58Z' - torchscript_onnx_tflite: - inference_time: 779.0 - throughput: 1283.6970474967907 + inference_time: 785.0 + throughput: 1273.8853503184714 estimated_peak_memory_range: - min: 65536 - max: 55920072 + min: 28672 + max: 1459320 primary_compute_unit: NPU precision: int8 layer_info: @@ -407,14 +309,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jgdxq99ep + job_id: j57yj8el5 job_status: Passed torchscript_onnx_qnn: - inference_time: 948.0 - throughput: 1054.8523206751054 + inference_time: 949.0 + throughput: 1053.740779768177 estimated_peak_memory_range: - min: 176128 - max: 1334984 + min: 204800 + max: 1604232 primary_compute_unit: NPU precision: int8 layer_info: @@ -422,7 +324,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: j5mn622wp + job_id: j5q67dl4p job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -431,13 +333,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T02:04:32Z' + timestamp: '2024-11-09T23:35:00Z' - torchscript_onnx_tflite: - inference_time: 780.0 - throughput: 1282.051282051282 + inference_time: 788.0 + throughput: 1269.0355329949239 estimated_peak_memory_range: - min: 24576 - max: 1450080 + min: 45056 + max: 1325664 primary_compute_unit: NPU precision: int8 layer_info: @@ -445,14 +347,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jp4ldoov5 + job_id: jp4lx2yv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 957.0 - throughput: 1044.932079414838 + inference_time: 949.0 + throughput: 1053.740779768177 estimated_peak_memory_range: - min: 180224 - max: 1454496 + min: 176128 + max: 1526696 primary_compute_unit: NPU precision: int8 layer_info: @@ -460,7 +362,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jprv2qq9g + job_id: jglv0qy85 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -469,13 +371,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T02:04:35Z' + timestamp: '2024-11-09T23:35:02Z' - torchscript_onnx_tflite: - inference_time: 1237.0 - throughput: 808.4074373484236 + inference_time: 1254.0 + throughput: 797.4481658692185 estimated_peak_memory_range: min: 12288 - max: 22546208 + max: 22704384 primary_compute_unit: NPU precision: int8 layer_info: @@ -483,14 +385,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: j57ylwwl5 + job_id: jpxk7zl15 job_status: Passed torchscript_onnx_qnn: - inference_time: 1553.0 - throughput: 643.915003219575 + inference_time: 1753.0 + throughput: 570.4506560182544 estimated_peak_memory_range: min: 0 - max: 5959088 + max: 6060928 primary_compute_unit: NPU precision: int8 layer_info: @@ -498,7 +400,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jgn6myyr5 + job_id: j56y3080p job_status: Passed reference_device_info: name: SA8295P ADP @@ -507,13 +409,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T02:04:34Z' + timestamp: '2024-11-09T23:35:04Z' - torchscript_onnx_tflite: - inference_time: 915.0 - throughput: 1092.896174863388 + inference_time: 906.0 + throughput: 1103.7527593818984 estimated_peak_memory_range: - min: 16384 - max: 66678192 + min: 12288 + max: 66622640 primary_compute_unit: NPU precision: int8 layer_info: @@ -521,14 +423,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jp2k9x2mp + job_id: j5mnwl0wp job_status: Passed torchscript_onnx_qnn: - inference_time: 1159.0 - throughput: 862.8127696289905 + inference_time: 1139.0 + throughput: 877.9631255487269 estimated_peak_memory_range: - min: 0 - max: 18170832 + min: 172032 + max: 19555088 primary_compute_unit: NPU precision: int8 layer_info: @@ -536,7 +438,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jpedwoj05 + job_id: jp3j4rzlg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -545,13 +447,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:44:17Z' + timestamp: '2024-11-09T23:35:06Z' - torchscript_onnx_qnn: - inference_time: 1010.0 - throughput: 990.0990099009902 + inference_time: 997.0 + throughput: 1003.0090270812437 estimated_peak_memory_range: - min: 548864 - max: 548864 + min: 425984 + max: 425984 primary_compute_unit: NPU precision: int8 layer_info: @@ -559,22 +461,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jp3j1nyzg - job_status: Passed - torchscript_onnx: - inference_time: 1575.0 - throughput: 634.9206349206349 - estimated_peak_memory_range: - min: 30523392 - max: 30523392 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 147 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 147 - job_id: j57yl2zr5 + job_id: jp8q3kexp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -583,4 +470,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:44:27Z' + timestamp: '2024-11-09T23:35:13Z' diff --git a/qai_hub_models/models/resnext101/README.md b/qai_hub_models/models/resnext101/README.md index a23499fd..d4b86ca2 100644 --- a/qai_hub_models/models/resnext101/README.md +++ b/qai_hub_models/models/resnext101/README.md @@ -5,8 +5,7 @@ ResNeXt101 is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of ResNeXt101 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/resnext101). diff --git a/qai_hub_models/models/resnext101/perf.yaml b/qai_hub_models/models/resnext101/perf.yaml index 283e4dde..aa73b173 100644 --- a/qai_hub_models/models/resnext101/perf.yaml +++ b/qai_hub_models/models/resnext101/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: ResNeXt101 performance_metrics: - torchscript_onnx_tflite: - inference_time: 6563.0 - throughput: 152.36934328813044 + inference_time: 6548.0 + throughput: 152.71838729383018 estimated_peak_memory_range: - min: 28672 - max: 2456144 + min: 53248 + max: 2295568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jp8q09oxp + job_id: jpxk74rl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 6759.0 - throughput: 147.95088030773783 + inference_time: 6647.0 + throughput: 150.4438092372499 estimated_peak_memory_range: min: 12288 - max: 39355272 + max: 35155112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,67 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jpede1915 + job_id: jglv0xqm5 job_status: Passed torchscript_onnx: - inference_time: 7095.0 - throughput: 140.94432699083862 - estimated_peak_memory_range: - min: 16384 - max: 203610696 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 247 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 247 - job_id: jgn6lyyr5 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:26:06Z' - - torchscript_onnx_tflite: - inference_time: 6725.0 - throughput: 148.6988847583643 - estimated_peak_memory_range: - min: 24576 - max: 2075648 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 147 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 147 - job_id: jgke7nz2g - job_status: Passed - torchscript_onnx_qnn: - inference_time: 6812.0 - throughput: 146.7997651203758 + inference_time: 7106.0 + throughput: 140.72614691809738 estimated_peak_memory_range: min: 12288 - max: 33594416 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 245 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 245 - job_id: jgz3o9ek5 - job_status: Passed - torchscript_onnx: - inference_time: 7025.0 - throughput: 142.34875444839858 - estimated_peak_memory_range: - min: 667648 - max: 3728936 + max: 203554736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: jprv8qq9g + job_id: jgdxr3mzp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -145,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:26:07Z' + timestamp: '2024-11-09T22:15:30Z' - torchscript_onnx_tflite: - inference_time: 5575.0 - throughput: 179.37219730941703 + inference_time: 4628.0 + throughput: 216.076058772688 estimated_peak_memory_range: min: 20480 - max: 388067360 + max: 388998768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: j5q6ek84p + job_id: jgn69nqq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 5470.0 - throughput: 182.81535648994515 + inference_time: 4681.0 + throughput: 213.62956633198036 estimated_peak_memory_range: - min: 626688 - max: 94268496 + min: 0 + max: 95462080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j5we2vv65 + job_id: j56y370yp job_status: Passed torchscript_onnx: - inference_time: 5807.0 - throughput: 172.20595832615808 + inference_time: 5195.0 + throughput: 192.49278152069297 estimated_peak_memory_range: - min: 0 - max: 392333360 + min: 634880 + max: 393980944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,7 +138,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: jp2k0664p + job_id: j57yj4895 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -198,13 +147,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:26:08Z' + timestamp: '2024-11-09T22:15:31Z' - torchscript_onnx_tflite: - inference_time: 4622.0 - throughput: 216.3565556036348 + inference_time: 4583.0 + throughput: 218.1976871045167 estimated_peak_memory_range: min: 12288 - max: 164460544 + max: 164974640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jgjvzmxxg + job_id: jprv40d7g job_status: Passed torchscript_onnx_qnn: - inference_time: 4722.0 - throughput: 211.7746717492588 + inference_time: 4827.0 + throughput: 207.16801325875284 estimated_peak_memory_range: min: 0 - max: 100739696 + max: 100697984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j5mnq22wp + job_id: jgo21r9kp job_status: Passed torchscript_onnx: - inference_time: 4421.0 - throughput: 226.1931689662972 + inference_time: 5001.0 + throughput: 199.96000799840033 estimated_peak_memory_range: - min: 647168 - max: 167358096 + min: 479232 + max: 166429968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -242,7 +191,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: jp0z3qq65 + job_id: jp4lx1215 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -251,13 +200,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:26:11Z' + timestamp: '2024-11-09T22:15:32Z' - torchscript_onnx_tflite: - inference_time: 6623.0 - throughput: 150.98897780462028 + inference_time: 6483.0 + throughput: 154.24957581366652 estimated_peak_memory_range: - min: 24576 - max: 2477840 + min: 20480 + max: 2262824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jglv6zn85 + job_id: jp2k7wdqp job_status: Passed torchscript_onnx_qnn: - inference_time: 6751.0 - throughput: 148.12620352540364 + inference_time: 6776.0 + throughput: 147.5796930342385 estimated_peak_memory_range: - min: 643072 - max: 1894080 + min: 638976 + max: 1911528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,7 +229,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jg9lj11lg + job_id: jpv61dnr5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -289,13 +238,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:25:57Z' + timestamp: '2024-11-09T22:15:22Z' - torchscript_onnx_tflite: - inference_time: 6547.0 - throughput: 152.7417137620284 + inference_time: 6609.0 + throughput: 151.30882130428205 estimated_peak_memory_range: - min: 40960 - max: 2640512 + min: 32768 + max: 2624336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jg9lyvmqg + job_id: jpy14x2lp job_status: Passed torchscript_onnx_qnn: - inference_time: 6876.0 - throughput: 145.43339150668993 + inference_time: 6845.0 + throughput: 146.09203798392988 estimated_peak_memory_range: min: 634880 - max: 2000208 + max: 1724200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,7 +267,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j5we3o7j5 + job_id: jpedrznv5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -327,13 +276,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:18:20Z' + timestamp: '2024-11-09T22:15:24Z' - torchscript_onnx_tflite: - inference_time: 6561.0 - throughput: 152.41579027587258 + inference_time: 6491.0 + throughput: 154.05946695424433 estimated_peak_memory_range: - min: 3706880 - max: 5386160 + min: 36864 + max: 2289984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jp14w0jkp + job_id: jp0z1j9n5 job_status: Passed torchscript_onnx_qnn: - inference_time: 6835.0 - throughput: 146.3057790782736 + inference_time: 6884.0 + throughput: 145.2643811737362 estimated_peak_memory_range: - min: 692224 - max: 2059560 + min: 630784 + max: 1837952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,7 +305,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jg9lyvmvg + job_id: jgz3xm0x5 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -365,13 +314,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:18:21Z' + timestamp: '2024-11-09T22:15:25Z' - torchscript_onnx_tflite: - inference_time: 6537.0 - throughput: 152.97537096527458 + inference_time: 6823.0 + throughput: 146.5630954125751 estimated_peak_memory_range: - min: 32768 - max: 2258776 + min: 53248 + max: 2287688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jgdxqw3kp + job_id: jp8q3xrop job_status: Passed torchscript_onnx_qnn: - inference_time: 6766.0 - throughput: 147.79781259237365 + inference_time: 6784.0 + throughput: 147.4056603773585 estimated_peak_memory_range: - min: 638976 - max: 2128592 + min: 643072 + max: 2033976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,7 +343,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jp14w0jlp + job_id: j5wed7rm5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -403,13 +352,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:18:22Z' + timestamp: '2024-11-09T22:15:26Z' - torchscript_onnx_tflite: - inference_time: 9194.0 - throughput: 108.76658690450293 + inference_time: 10687.0 + throughput: 93.57162908206232 estimated_peak_memory_range: - min: 16384 - max: 170548512 + min: 36864 + max: 106381088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +366,52 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jpv60o3j5 + job_id: jgkel40ng job_status: Passed torchscript_onnx_qnn: - inference_time: 9363.0 - throughput: 106.80337498664957 + inference_time: 10828.0 + throughput: 92.35315847801995 estimated_peak_memory_range: - min: 638976 - max: 55315728 + min: 667648 + max: 6554480 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 245 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 245 + job_id: jg9l3mq8g + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:15:27Z' + - torchscript_onnx_tflite: + inference_time: 9218.0 + throughput: 108.48340203948796 + estimated_peak_memory_range: + min: 73728 + max: 171905792 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 147 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 147 + job_id: j5q67y1op + job_status: Passed + torchscript_onnx_qnn: + inference_time: 9281.0 + throughput: 107.74701002047193 + estimated_peak_memory_range: + min: 622592 + max: 54109280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jpxknjj15 + job_id: jp14djm7p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:26:04Z' + timestamp: '2024-11-09T22:15:29Z' - torchscript_onnx_qnn: - inference_time: 6878.0 - throughput: 145.39110206455365 + inference_time: 6907.0 + throughput: 144.78065730418416 estimated_peak_memory_range: min: 602112 max: 602112 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jp14yll2p + job_id: jgjv078eg job_status: Passed torchscript_onnx: - inference_time: 6813.0 - throughput: 146.7782181124321 + inference_time: 6817.0 + throughput: 146.69209329617135 estimated_peak_memory_range: - min: 181293056 - max: 181293056 + min: 181190656 + max: 181190656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: jpy1rww7p + job_id: jpxk74zl5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:26:09Z' + timestamp: '2024-11-09T22:15:33Z' diff --git a/qai_hub_models/models/resnext101_quantized/README.md b/qai_hub_models/models/resnext101_quantized/README.md index 6ed9ae1b..658fa1f1 100644 --- a/qai_hub_models/models/resnext101_quantized/README.md +++ b/qai_hub_models/models/resnext101_quantized/README.md @@ -5,8 +5,7 @@ ResNeXt101 is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of ResNeXt101Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/resnext101_quantized). diff --git a/qai_hub_models/models/resnext101_quantized/perf.yaml b/qai_hub_models/models/resnext101_quantized/perf.yaml index 79425c28..be1e2ed8 100644 --- a/qai_hub_models/models/resnext101_quantized/perf.yaml +++ b/qai_hub_models/models/resnext101_quantized/perf.yaml @@ -49,11 +49,11 @@ models: - name: ResNeXt101Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 2909.0 - throughput: 343.7607425232039 + inference_time: 2829.0 + throughput: 353.4817956875221 estimated_peak_memory_range: min: 16384 - max: 2096016 + max: 1925968 primary_compute_unit: NPU precision: int8 layer_info: @@ -61,67 +61,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jpv6rqy75 + job_id: j5q67dv4p job_status: Passed torchscript_onnx_qnn: - inference_time: 3126.0 - throughput: 319.8976327575176 - estimated_peak_memory_range: - min: 32768 - max: 30631392 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 246 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 246 - job_id: jgn6m04v5 - job_status: Passed - torchscript_onnx: - inference_time: 3882.0 - throughput: 257.5991756826378 - estimated_peak_memory_range: - min: 12288 - max: 102774632 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 283 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 283 - job_id: jpv6rq875 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:43:11Z' - - torchscript_onnx_tflite: - inference_time: 3006.0 - throughput: 332.667997338656 + inference_time: 3043.0 + throughput: 328.62306933946763 estimated_peak_memory_range: min: 12288 - max: 1933016 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 150 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 150 - job_id: jgjv2d67g - job_status: Passed - torchscript_onnx_qnn: - inference_time: 3160.0 - throughput: 316.45569620253167 - estimated_peak_memory_range: - min: 12288 - max: 33663464 + max: 32373744 primary_compute_unit: NPU precision: int8 layer_info: @@ -129,22 +76,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jprv26rvg - job_status: Passed - torchscript_onnx: - inference_time: 3807.0 - throughput: 262.67402153926975 - estimated_peak_memory_range: - min: 12288 - max: 103635496 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 283 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 283 - job_id: jgjv2d97g + job_id: jp14dmj2p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -153,13 +85,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:43:12Z' + timestamp: '2024-11-09T23:33:57Z' - torchscript_onnx_tflite: - inference_time: 2056.0 - throughput: 486.38132295719845 + inference_time: 2096.0 + throughput: 477.09923664122135 estimated_peak_memory_range: - min: 16384 - max: 286953408 + min: 12288 + max: 289997696 primary_compute_unit: NPU precision: int8 layer_info: @@ -167,14 +99,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jpedwo075 + job_id: jglv0ql85 job_status: Passed torchscript_onnx_qnn: - inference_time: 2272.0 - throughput: 440.14084507042253 + inference_time: 2332.0 + throughput: 428.8164665523156 estimated_peak_memory_range: min: 12288 - max: 96604784 + max: 98694800 primary_compute_unit: NPU precision: int8 layer_info: @@ -182,22 +114,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jp2k9x1xp - job_status: Passed - torchscript_onnx: - inference_time: 3261.0 - throughput: 306.65440049064705 - estimated_peak_memory_range: - min: 0 - max: 351604496 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 283 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 283 - job_id: jpedwoq75 + job_id: jgdxrm3ep job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -206,13 +123,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:43:14Z' + timestamp: '2024-11-09T23:33:59Z' - torchscript_onnx_tflite: - inference_time: 1869.0 - throughput: 535.0454788657036 + inference_time: 2083.0 + throughput: 480.07681228996637 estimated_peak_memory_range: - min: 8192 - max: 198807728 + min: 12288 + max: 199150480 primary_compute_unit: NPU precision: int8 layer_info: @@ -220,14 +137,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jpxk698j5 + job_id: j56y30w0p job_status: Passed torchscript_onnx_qnn: - inference_time: 2311.0 - throughput: 432.7131112072696 + inference_time: 1979.0 + throughput: 505.3057099545225 estimated_peak_memory_range: min: 0 - max: 96291856 + max: 94812560 primary_compute_unit: NPU precision: int8 layer_info: @@ -235,22 +152,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jp3j1ndxg - job_status: Passed - torchscript_onnx: - inference_time: 2879.0 - throughput: 347.34282737061477 - estimated_peak_memory_range: - min: 0 - max: 237165504 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 283 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 283 - job_id: j5we3wkz5 + job_id: j57yj84l5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -259,13 +161,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:43:17Z' + timestamp: '2024-11-09T23:34:00Z' - torchscript_onnx_tflite: - inference_time: 9967.0 - throughput: 100.33109260559847 + inference_time: 9960.0 + throughput: 100.40160642570281 estimated_peak_memory_range: - min: 36864 - max: 209085504 + min: 106496 + max: 208658992 primary_compute_unit: NPU precision: int8 layer_info: @@ -273,14 +175,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jgz3j2qz5 + job_id: jp3j4r6lg job_status: Passed torchscript_onnx_qnn: - inference_time: 14964.0 - throughput: 66.82705159048383 + inference_time: 14636.0 + throughput: 68.3246788740093 estimated_peak_memory_range: - min: 204800 - max: 8490464 + min: 221184 + max: 8490400 primary_compute_unit: NPU precision: int8 layer_info: @@ -288,7 +190,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jpy1jzlrp + job_id: jp4lx21v5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -297,13 +199,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-27T00:42:54Z' + timestamp: '2024-11-09T23:33:42Z' - torchscript_onnx_tflite: - inference_time: 114073.0 - throughput: 8.766316306224962 + inference_time: 133943.0 + throughput: 7.465862344430094 estimated_peak_memory_range: min: 28672 - max: 350257144 + max: 546318744 primary_compute_unit: GPU precision: int8 layer_info: @@ -311,7 +213,7 @@ models: layers_on_gpu: 125 layers_on_cpu: 11 total_layers: 150 - job_id: j5we3w0z5 + job_id: jgo2198xp job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -320,13 +222,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-27T00:42:36Z' + timestamp: '2024-11-09T23:33:25Z' - torchscript_onnx_tflite: - inference_time: 2764.0 - throughput: 361.794500723589 + inference_time: 2899.0 + throughput: 344.94653328734046 estimated_peak_memory_range: - min: 20480 - max: 2228664 + min: 16384 + max: 2365728 primary_compute_unit: NPU precision: int8 layer_info: @@ -334,14 +236,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jg9ly07qg + job_id: jpv61n7j5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2939.0 - throughput: 340.25178632187817 + inference_time: 2937.0 + throughput: 340.4834865509023 estimated_peak_memory_range: - min: 180224 - max: 1473656 + min: 172032 + max: 1420544 primary_compute_unit: NPU precision: int8 layer_info: @@ -349,7 +251,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jp0z24w25 + job_id: jpxk7z415 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -358,13 +260,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:42:56Z' + timestamp: '2024-11-09T23:33:44Z' - torchscript_onnx_tflite: - inference_time: 2840.0 - throughput: 352.11267605633805 + inference_time: 2865.0 + throughput: 349.04013961605585 estimated_peak_memory_range: - min: 20480 - max: 2212504 + min: 24576 + max: 1859960 primary_compute_unit: NPU precision: int8 layer_info: @@ -372,14 +274,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: j5q6rk84p + job_id: jgjv08qxg job_status: Passed torchscript_onnx_qnn: - inference_time: 2951.0 - throughput: 338.8681802778719 + inference_time: 2928.0 + throughput: 341.53005464480873 estimated_peak_memory_range: - min: 176128 - max: 1884808 + min: 217088 + max: 1788224 primary_compute_unit: NPU precision: int8 layer_info: @@ -387,7 +289,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jgo2n0yxp + job_id: jgn69wnr5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -396,13 +298,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T02:04:00Z' + timestamp: '2024-11-09T23:33:48Z' - torchscript_onnx_tflite: - inference_time: 2816.0 - throughput: 355.1136363636364 + inference_time: 2845.0 + throughput: 351.493848857645 estimated_peak_memory_range: - min: 28672 - max: 2219592 + min: 16384 + max: 2153064 primary_compute_unit: NPU precision: int8 layer_info: @@ -410,14 +312,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jglv2zn85 + job_id: jpedrny15 job_status: Passed torchscript_onnx_qnn: - inference_time: 2929.0 - throughput: 341.4134516899966 + inference_time: 2930.0 + throughput: 341.29692832764505 estimated_peak_memory_range: - min: 196608 - max: 1396512 + min: 208896 + max: 1367192 primary_compute_unit: NPU precision: int8 layer_info: @@ -425,7 +327,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jpv6ro3j5 + job_id: jprv4709g job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -434,13 +336,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T02:04:02Z' + timestamp: '2024-11-09T23:33:50Z' - torchscript_onnx_tflite: - inference_time: 2892.0 - throughput: 345.78146611341634 + inference_time: 2870.0 + throughput: 348.4320557491289 estimated_peak_memory_range: - min: 28672 - max: 2778944 + min: 24576 + max: 5205992 primary_compute_unit: NPU precision: int8 layer_info: @@ -448,14 +350,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jp3j13klg + job_id: jgz3x0nk5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2957.0 - throughput: 338.18058843422386 + inference_time: 3007.0 + throughput: 332.5573661456601 estimated_peak_memory_range: - min: 200704 - max: 1639600 + min: 172032 + max: 1411832 primary_compute_unit: NPU precision: int8 layer_info: @@ -463,7 +365,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jpedw1915 + job_id: jp2k7zw4p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -472,13 +374,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T02:04:05Z' + timestamp: '2024-11-09T23:33:51Z' - torchscript_onnx_tflite: - inference_time: 4041.0 - throughput: 247.46349913387775 + inference_time: 4061.0 + throughput: 246.2447672986949 estimated_peak_memory_range: - min: 16384 - max: 199162400 + min: 12288 + max: 199889792 primary_compute_unit: NPU precision: int8 layer_info: @@ -486,14 +388,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: j56yzj60p + job_id: j5wedr765 job_status: Passed torchscript_onnx_qnn: - inference_time: 4262.0 - throughput: 234.6316283435007 + inference_time: 4816.0 + throughput: 207.64119601328903 estimated_peak_memory_range: - min: 163840 - max: 6034224 + min: 200704 + max: 6052752 primary_compute_unit: NPU precision: int8 layer_info: @@ -501,7 +403,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jgjv2mxxg + job_id: jpy14yx7p job_status: Passed reference_device_info: name: SA8295P ADP @@ -510,13 +412,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T02:04:04Z' + timestamp: '2024-11-09T23:33:53Z' - torchscript_onnx_tflite: - inference_time: 3394.0 - throughput: 294.6375957572186 + inference_time: 3391.0 + throughput: 294.8982601002654 estimated_peak_memory_range: - min: 12288 - max: 291472336 + min: 16384 + max: 291008352 primary_compute_unit: NPU precision: int8 layer_info: @@ -524,14 +426,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jp4ldn6q5 + job_id: jg9l3qmlg job_status: Passed torchscript_onnx_qnn: - inference_time: 3757.0 - throughput: 266.1698163428267 + inference_time: 3622.0 + throughput: 276.09055770292656 estimated_peak_memory_range: - min: 12288 - max: 98328992 + min: 28672 + max: 100561984 primary_compute_unit: NPU precision: int8 layer_info: @@ -539,7 +441,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: j56yz2xvp + job_id: jp0z1xj65 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -548,13 +450,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:43:05Z' + timestamp: '2024-11-09T23:33:55Z' - torchscript_onnx_qnn: - inference_time: 3137.0 - throughput: 318.77590054191904 + inference_time: 3087.0 + throughput: 323.9390994493035 estimated_peak_memory_range: - min: 217088 - max: 217088 + min: 188416 + max: 188416 primary_compute_unit: NPU precision: int8 layer_info: @@ -562,22 +464,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jp8qm2nzp - job_status: Passed - torchscript_onnx: - inference_time: 4241.0 - throughput: 235.7934449422306 - estimated_peak_memory_range: - min: 94556160 - max: 94556160 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 283 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 283 - job_id: jgz3j26z5 + job_id: j5mnwlmwp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -586,4 +473,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:43:15Z' + timestamp: '2024-11-09T23:34:02Z' diff --git a/qai_hub_models/models/resnext50/README.md b/qai_hub_models/models/resnext50/README.md index 2c206d02..f13fff9e 100644 --- a/qai_hub_models/models/resnext50/README.md +++ b/qai_hub_models/models/resnext50/README.md @@ -5,8 +5,7 @@ ResNeXt50 is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of ResNeXt50 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/resnext50). diff --git a/qai_hub_models/models/resnext50/perf.yaml b/qai_hub_models/models/resnext50/perf.yaml index 61d4acf2..e693ba50 100644 --- a/qai_hub_models/models/resnext50/perf.yaml +++ b/qai_hub_models/models/resnext50/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: ResNeXt50 performance_metrics: - torchscript_onnx_tflite: - inference_time: 2525.0 - throughput: 396.03960396039605 + inference_time: 2485.0 + throughput: 402.4144869215292 estimated_peak_memory_range: min: 16384 - max: 2353688 + max: 2316840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jprv8qz9g + job_id: jg9l3mo8g job_status: Passed torchscript_onnx_qnn: - inference_time: 2577.0 - throughput: 388.04811796662784 + inference_time: 2589.0 + throughput: 386.24951718810354 estimated_peak_memory_range: min: 618496 - max: 73535992 + max: 73558280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jp3jv3ylg + job_id: jp2k7woqp job_status: Passed torchscript_onnx: - inference_time: 2757.0 - throughput: 362.71309394269133 + inference_time: 2732.0 + throughput: 366.03221083455344 estimated_peak_memory_range: - min: 425984 - max: 2483208 + min: 581632 + max: 2689296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: j57y0wzl5 + job_id: jpv61dmr5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:25:16Z' + timestamp: '2024-11-09T22:14:40Z' - torchscript_onnx_tflite: - inference_time: 2549.0 - throughput: 392.31071008238524 + inference_time: 1762.0 + throughput: 567.5368898978434 estimated_peak_memory_range: min: 16384 - max: 2269152 + max: 183395824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jp2k0624p + job_id: jp14djo7p job_status: Passed torchscript_onnx_qnn: - inference_time: 2597.0 - throughput: 385.0596842510589 + inference_time: 1813.0 + throughput: 551.5719801434087 estimated_peak_memory_range: - min: 626688 - max: 42343680 + min: 0 + max: 39564384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgo2k0jxp + job_id: jpy14x8lp job_status: Passed torchscript_onnx: - inference_time: 2759.0 - throughput: 362.4501631025734 + inference_time: 1974.0 + throughput: 506.5856129685917 estimated_peak_memory_range: - min: 385024 - max: 2409584 + min: 0 + max: 185886320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jp4lkoqv5 + job_id: jgjv07yeg job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:25:17Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T22:14:41Z' - torchscript_onnx_tflite: - inference_time: 1965.0 - throughput: 508.9058524173028 + inference_time: 1678.0 + throughput: 595.9475566150179 estimated_peak_memory_range: - min: 16384 - max: 183848032 + min: 12288 + max: 63037744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jpy1rw97p + job_id: jgdxr36zp job_status: Passed torchscript_onnx_qnn: - inference_time: 2042.0 - throughput: 489.71596474045054 + inference_time: 1742.0 + throughput: 574.052812858783 estimated_peak_memory_range: - min: 647168 - max: 37415312 + min: 0 + max: 39745920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jpv60ojj5 + job_id: jp0z1jon5 job_status: Passed torchscript_onnx: - inference_time: 2185.0 - throughput: 457.66590389016017 + inference_time: 1946.0 + throughput: 513.874614594039 estimated_peak_memory_range: min: 0 - max: 187026240 + max: 65332880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jpxknjv15 + job_id: jpedrzxv5 job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:25:18Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T22:14:42Z' - torchscript_onnx_tflite: - inference_time: 1450.0 - throughput: 689.6551724137931 + inference_time: 2474.0 + throughput: 404.2037186742118 estimated_peak_memory_range: - min: 12288 - max: 63831392 + min: 49152 + max: 3513016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: j56yejk0p + job_id: j57yj4o95 job_status: Passed torchscript_onnx_qnn: - inference_time: 1508.0 - throughput: 663.1299734748011 + inference_time: 2489.0 + throughput: 401.76777822418643 estimated_peak_memory_range: - min: 614400 - max: 41039840 + min: 630784 + max: 1783608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgdxe9wep - job_status: Passed - torchscript_onnx: - inference_time: 1942.0 - throughput: 514.9330587023687 - estimated_peak_memory_range: - min: 0 - max: 65323200 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 128 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 128 - job_id: jgn6ly2r5 + job_id: jp8q3xjop job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:25:20Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T22:14:32Z' - torchscript_onnx_tflite: - inference_time: 2486.0 - throughput: 402.2526146419952 + inference_time: 2498.0 + throughput: 400.320256204964 estimated_peak_memory_range: min: 16384 - max: 1738960 + max: 2251760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jp0z3qn65 + job_id: jp4lx1e15 job_status: Passed torchscript_onnx_qnn: - inference_time: 2484.0 - throughput: 402.5764895330113 + inference_time: 2514.0 + throughput: 397.77247414478916 estimated_peak_memory_range: - min: 634880 - max: 1805992 + min: 651264 + max: 2120720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgjvzmjxg + job_id: j5q67y4op job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:25:09Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T22:14:34Z' - torchscript_onnx_tflite: - inference_time: 2542.0 - throughput: 393.3910306845004 + inference_time: 2510.0 + throughput: 398.40637450199205 estimated_peak_memory_range: - min: 20480 - max: 1776000 + min: 16384 + max: 2443448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jglv2nxe5 + job_id: jpxk740l5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2533.0 - throughput: 394.78878799842084 + inference_time: 2486.0 + throughput: 402.2526146419952 estimated_peak_memory_range: - min: 618496 - max: 2183032 + min: 634880 + max: 1881088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgo2nyr4p + job_id: jglv0x8m5 job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:17:53Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T22:14:35Z' - torchscript_onnx_tflite: - inference_time: 2599.0 - throughput: 384.7633705271258 + inference_time: 2520.0 + throughput: 396.8253968253968 estimated_peak_memory_range: - min: 12288 - max: 2209424 + min: 16384 + max: 2099904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: j56yz67vp + job_id: j5mnwm99p job_status: Passed torchscript_onnx_qnn: - inference_time: 2523.0 - throughput: 396.3535473642489 + inference_time: 2502.0 + throughput: 399.68025579536373 estimated_peak_memory_range: - min: 626688 - max: 1975016 + min: 638976 + max: 1901536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jpv6r3d75 + job_id: j56y37myp job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:17:54Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T22:14:36Z' - torchscript_onnx_tflite: - inference_time: 2514.0 - throughput: 397.77247414478916 + inference_time: 3986.0 + throughput: 250.87807325639739 estimated_peak_memory_range: min: 16384 - max: 2495832 + max: 43101584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jp3j1k9xg + job_id: jgn69n1q5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2566.0 - throughput: 389.7116134060795 + inference_time: 4299.0 + throughput: 232.61223540358222 estimated_peak_memory_range: - min: 12288 - max: 1267384 + min: 0 + max: 5646992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgjv2x77g + job_id: jp3j497ng job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:17:55Z' + chipset: SA8295P + timestamp: '2024-11-09T22:14:37Z' - torchscript_onnx_tflite: - inference_time: 3251.0 - throughput: 307.59766225776684 + inference_time: 3253.0 + throughput: 307.40854595757764 estimated_peak_memory_range: min: 16384 - max: 118560384 + max: 117000032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jglv6zj85 + job_id: jprv40x7g job_status: Passed torchscript_onnx_qnn: - inference_time: 3423.0 - throughput: 292.141396435875 + inference_time: 3376.0 + throughput: 296.2085308056872 estimated_peak_memory_range: min: 618496 - max: 28921600 + max: 26792912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jp14yl02p + job_id: jgo21rwkp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:25:14Z' + timestamp: '2024-11-09T22:14:39Z' - torchscript_onnx_qnn: - inference_time: 2645.0 - throughput: 378.0718336483932 + inference_time: 2684.0 + throughput: 372.5782414307005 estimated_peak_memory_range: min: 602112 max: 602112 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jpede1j15 + job_id: jgkel46ng job_status: Passed torchscript_onnx: - inference_time: 2666.0 - throughput: 375.0937734433608 + inference_time: 2710.0 + throughput: 369.00369003690037 estimated_peak_memory_range: - min: 54370304 - max: 54370304 + min: 53198848 + max: 53198848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: j5mnq2rwp + job_id: jgz3xmyx5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:25:19Z' + timestamp: '2024-11-09T22:14:43Z' diff --git a/qai_hub_models/models/resnext50_quantized/README.md b/qai_hub_models/models/resnext50_quantized/README.md index d03bd9ef..0b85050b 100644 --- a/qai_hub_models/models/resnext50_quantized/README.md +++ b/qai_hub_models/models/resnext50_quantized/README.md @@ -5,8 +5,7 @@ ResNeXt50 is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of ResNeXt50Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/resnext50_quantized). diff --git a/qai_hub_models/models/resnext50_quantized/perf.yaml b/qai_hub_models/models/resnext50_quantized/perf.yaml index f1fcb2fe..0816087d 100644 --- a/qai_hub_models/models/resnext50_quantized/perf.yaml +++ b/qai_hub_models/models/resnext50_quantized/perf.yaml @@ -49,11 +49,11 @@ models: - name: ResNeXt50Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 947.0 - throughput: 1055.9662090813094 + inference_time: 928.0 + throughput: 1077.5862068965516 estimated_peak_memory_range: - min: 16384 - max: 1414056 + min: 24576 + max: 317915520 primary_compute_unit: NPU precision: int8 layer_info: @@ -61,67 +61,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jpy1jzyrp + job_id: jprv47n9g job_status: Passed torchscript_onnx_qnn: - inference_time: 1181.0 - throughput: 846.740050804403 - estimated_peak_memory_range: - min: 12288 - max: 10887032 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 127 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 127 - job_id: jgz3j20z5 - job_status: Passed - torchscript_onnx: - inference_time: 1807.0 - throughput: 553.4034311012729 - estimated_peak_memory_range: - min: 12288 - max: 31435424 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 147 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 147 - job_id: jpy1jzkrp - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:41:54Z' - - torchscript_onnx_tflite: - inference_time: 951.0 - throughput: 1051.5247108307046 - estimated_peak_memory_range: - min: 12288 - max: 2754480 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 82 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 82 - job_id: jp0z24x25 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 1183.0 - throughput: 845.30853761623 + inference_time: 1188.0 + throughput: 841.7508417508418 estimated_peak_memory_range: min: 16384 - max: 65525216 + max: 16018264 primary_compute_unit: NPU precision: int8 layer_info: @@ -129,22 +76,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: j5we3wrz5 - job_status: Passed - torchscript_onnx: - inference_time: 1842.0 - throughput: 542.8881650380022 - estimated_peak_memory_range: - min: 12288 - max: 31509584 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 147 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 147 - job_id: jp0z24825 + job_id: jpv61n9j5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -153,13 +85,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:41:55Z' + timestamp: '2024-11-09T23:32:42Z' - torchscript_onnx_tflite: - inference_time: 693.0 - throughput: 1443.001443001443 + inference_time: 686.0 + throughput: 1457.725947521866 estimated_peak_memory_range: min: 16384 - max: 110449856 + max: 109489552 primary_compute_unit: NPU precision: int8 layer_info: @@ -167,14 +99,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jp8qm2kzp + job_id: jp2k7zv4p job_status: Passed torchscript_onnx_qnn: - inference_time: 883.0 - throughput: 1132.5028312570782 + inference_time: 889.0 + throughput: 1124.859392575928 estimated_peak_memory_range: min: 167936 - max: 33953488 + max: 39136672 primary_compute_unit: NPU precision: int8 layer_info: @@ -182,22 +114,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jg9ly0qqg - job_status: Passed - torchscript_onnx: - inference_time: 1307.0 - throughput: 765.1109410864575 - estimated_peak_memory_range: - min: 28672 - max: 146183632 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 147 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 147 - job_id: jp8qm2dzp + job_id: jgjv08wxg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -206,13 +123,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:41:57Z' + timestamp: '2024-11-09T23:32:44Z' - torchscript_onnx_tflite: - inference_time: 580.0 - throughput: 1724.1379310344828 + inference_time: 582.0 + throughput: 1718.213058419244 estimated_peak_memory_range: min: 8192 - max: 56134288 + max: 56301200 primary_compute_unit: NPU precision: int8 layer_info: @@ -220,14 +137,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jgjv2d87g + job_id: jpy14y77p job_status: Passed torchscript_onnx_qnn: - inference_time: 845.0 - throughput: 1183.4319526627219 + inference_time: 870.0 + throughput: 1149.4252873563219 estimated_peak_memory_range: - min: 159744 - max: 33149376 + min: 0 + max: 33575264 primary_compute_unit: NPU precision: int8 layer_info: @@ -235,22 +152,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jprv26mvg - job_status: Passed - torchscript_onnx: - inference_time: 1074.0 - throughput: 931.0986964618249 - estimated_peak_memory_range: - min: 0 - max: 79696144 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 147 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 147 - job_id: j5q6r0x7p + job_id: jpedrnl15 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -259,13 +161,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:42:01Z' + timestamp: '2024-11-09T23:32:45Z' - torchscript_onnx_tflite: - inference_time: 3127.0 - throughput: 319.79533098816756 + inference_time: 3098.0 + throughput: 322.7888960619755 estimated_peak_memory_range: min: 12288 - max: 59945504 + max: 58954240 primary_compute_unit: NPU precision: int8 layer_info: @@ -273,14 +175,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jgkeqvkyg + job_id: jp0z1xv65 job_status: Passed torchscript_onnx_qnn: - inference_time: 4527.0 - throughput: 220.8968411751712 + inference_time: 4771.0 + throughput: 209.5996646405366 estimated_peak_memory_range: - min: 200704 - max: 8289152 + min: 163840 + max: 8411952 primary_compute_unit: NPU precision: int8 layer_info: @@ -288,7 +190,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jp14w2mkp + job_id: jgz3x04k5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -297,13 +199,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-27T00:41:38Z' + timestamp: '2024-11-09T23:32:27Z' - torchscript_onnx_tflite: - inference_time: 62042.0 - throughput: 16.118113535991746 + inference_time: 60540.0 + throughput: 16.518004625041296 estimated_peak_memory_range: - min: 1949696 - max: 19294416 + min: 12288 + max: 135536760 primary_compute_unit: GPU precision: int8 layer_info: @@ -311,7 +213,7 @@ models: layers_on_gpu: 57 layers_on_cpu: 11 total_layers: 82 - job_id: j5q6r0d7p + job_id: jp8q3k4xp job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -320,13 +222,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-27T00:41:19Z' + timestamp: '2024-11-09T23:32:10Z' - torchscript_onnx_tflite: - inference_time: 916.0 - throughput: 1091.703056768559 + inference_time: 910.0 + throughput: 1098.901098901099 estimated_peak_memory_range: - min: 16384 - max: 4416872 + min: 32768 + max: 12155744 primary_compute_unit: NPU precision: int8 layer_info: @@ -334,14 +236,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jglv24qe5 + job_id: jgkelk92g job_status: Passed torchscript_onnx_qnn: - inference_time: 1127.0 - throughput: 887.3114463176574 + inference_time: 1133.0 + throughput: 882.61253309797 estimated_peak_memory_range: min: 184320 - max: 1366616 + max: 1634328 primary_compute_unit: NPU precision: int8 layer_info: @@ -349,7 +251,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jgdxqnmkp + job_id: j5wedr465 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -358,13 +260,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:41:40Z' + timestamp: '2024-11-09T23:32:29Z' - torchscript_onnx_tflite: - inference_time: 939.0 - throughput: 1064.9627263045793 + inference_time: 936.0 + throughput: 1068.3760683760684 estimated_peak_memory_range: min: 16384 - max: 14476408 + max: 2711360 primary_compute_unit: NPU precision: int8 layer_info: @@ -372,14 +274,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jp14wl02p + job_id: j5q67dm4p job_status: Passed torchscript_onnx_qnn: - inference_time: 1142.0 - throughput: 875.6567425569177 + inference_time: 1149.0 + throughput: 870.3220191470845 estimated_peak_memory_range: min: 180224 - max: 1357872 + max: 1557472 primary_compute_unit: NPU precision: int8 layer_info: @@ -387,7 +289,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: j5mn62rwp + job_id: jp14dm62p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -396,13 +298,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T02:03:18Z' + timestamp: '2024-11-09T23:32:33Z' - torchscript_onnx_tflite: - inference_time: 945.0 - throughput: 1058.2010582010582 + inference_time: 937.0 + throughput: 1067.2358591248667 estimated_peak_memory_range: min: 12288 - max: 110493984 + max: 1398424 primary_compute_unit: NPU precision: int8 layer_info: @@ -410,14 +312,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jgdxq9wep + job_id: jglv0q185 job_status: Passed torchscript_onnx_qnn: - inference_time: 1149.0 - throughput: 870.3220191470845 + inference_time: 1144.0 + throughput: 874.1258741258741 estimated_peak_memory_range: min: 16384 - max: 1618104 + max: 1354608 primary_compute_unit: NPU precision: int8 layer_info: @@ -425,7 +327,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jgn6my2r5 + job_id: jgdxrm2ep job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -434,13 +336,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T02:03:20Z' + timestamp: '2024-11-09T23:32:34Z' - torchscript_onnx_tflite: - inference_time: 935.0 - throughput: 1069.51871657754 + inference_time: 939.0 + throughput: 1064.9627263045793 estimated_peak_memory_range: min: 12288 - max: 7420208 + max: 1595144 primary_compute_unit: NPU precision: int8 layer_info: @@ -448,14 +350,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jpxk6jv15 + job_id: j56y30d0p job_status: Passed torchscript_onnx_qnn: - inference_time: 1165.0 - throughput: 858.3690987124463 + inference_time: 1142.0 + throughput: 875.6567425569177 estimated_peak_memory_range: - min: 172032 - max: 1597112 + min: 212992 + max: 1532112 primary_compute_unit: NPU precision: int8 layer_info: @@ -463,7 +365,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jp2k9684p + job_id: j57yj89l5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -472,13 +374,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T02:03:23Z' + timestamp: '2024-11-09T23:32:36Z' - torchscript_onnx_tflite: - inference_time: 1491.0 - throughput: 670.690811535882 + inference_time: 1484.0 + throughput: 673.8544474393531 estimated_peak_memory_range: min: 12288 - max: 54535232 + max: 54440064 primary_compute_unit: NPU precision: int8 layer_info: @@ -486,14 +388,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: j57ylwzl5 + job_id: jp3j4rwlg job_status: Passed torchscript_onnx_qnn: - inference_time: 1819.0 - throughput: 549.7526113249038 + inference_time: 1915.0 + throughput: 522.1932114882507 estimated_peak_memory_range: min: 0 - max: 5925504 + max: 5851968 primary_compute_unit: NPU precision: int8 layer_info: @@ -501,7 +403,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jprv2qk9g + job_id: jp4lx23v5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -510,13 +412,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T02:03:22Z' + timestamp: '2024-11-09T23:32:38Z' - torchscript_onnx_tflite: - inference_time: 1082.0 - throughput: 924.2144177449168 + inference_time: 1109.0 + throughput: 901.7132551848512 estimated_peak_memory_range: - min: 20480 - max: 110620864 + min: 4096 + max: 111190000 primary_compute_unit: NPU precision: int8 layer_info: @@ -524,14 +426,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jpv6rqn75 + job_id: jgo2194xp job_status: Passed torchscript_onnx_qnn: - inference_time: 1416.0 - throughput: 706.2146892655368 + inference_time: 1380.0 + throughput: 724.6376811594203 estimated_peak_memory_range: - min: 0 - max: 37713600 + min: 167936 + max: 36663376 primary_compute_unit: NPU precision: int8 layer_info: @@ -539,7 +441,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jgn6m0dv5 + job_id: jpxk7zx15 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -548,13 +450,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:41:49Z' + timestamp: '2024-11-09T23:32:40Z' - torchscript_onnx_qnn: - inference_time: 1267.0 - throughput: 789.2659826361484 + inference_time: 1262.0 + throughput: 792.3930269413629 estimated_peak_memory_range: - min: 438272 - max: 438272 + min: 430080 + max: 430080 primary_compute_unit: NPU precision: int8 layer_info: @@ -562,22 +464,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: j57yl28q5 - job_status: Passed - torchscript_onnx: - inference_time: 1955.0 - throughput: 511.5089514066496 - estimated_peak_memory_range: - min: 29859840 - max: 29859840 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 147 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 147 - job_id: jgkeqvwyg + job_id: jg9l3qdlg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -586,4 +473,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:41:59Z' + timestamp: '2024-11-09T23:32:47Z' diff --git a/qai_hub_models/models/riffusion_quantized/README.md b/qai_hub_models/models/riffusion_quantized/README.md index d889deb9..3c1edcfc 100644 --- a/qai_hub_models/models/riffusion_quantized/README.md +++ b/qai_hub_models/models/riffusion_quantized/README.md @@ -5,8 +5,7 @@ Generates high resolution spectrograms images from text prompts using a latent diffusion model. This model uses CLIP ViT-L/14 as text encoder, U-Net based latent denoising, and VAE based decoder to generate the final image. -This is based on the implementation of Riffusion found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/riffusion_quantized). diff --git a/qai_hub_models/models/riffusion_quantized/requirements.txt b/qai_hub_models/models/riffusion_quantized/requirements.txt index 30d9a5e4..047c5390 100644 --- a/qai_hub_models/models/riffusion_quantized/requirements.txt +++ b/qai_hub_models/models/riffusion_quantized/requirements.txt @@ -1,2 +1,2 @@ transformers==4.41.1 -diffusers[torch]==0.21.4 +diffusers[torch]==0.31.0 diff --git a/qai_hub_models/models/sam/README.md b/qai_hub_models/models/sam/README.md index ac8adc3b..473460ac 100644 --- a/qai_hub_models/models/sam/README.md +++ b/qai_hub_models/models/sam/README.md @@ -5,8 +5,7 @@ Transformer based encoder-decoder where prompts specify what to segment in an image thereby allowing segmentation without the need for additional training. The image encoder generates embeddings and the lightweight decoder operates on the embeddings for point and mask based image segmentation. -This is based on the implementation of Segment-Anything-Model found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/sam). diff --git a/qai_hub_models/models/sam/export.py b/qai_hub_models/models/sam/export.py index c8f1c5c7..acc7727d 100644 --- a/qai_hub_models/models/sam/export.py +++ b/qai_hub_models/models/sam/export.py @@ -252,7 +252,6 @@ def main(): components=ALL_COMPONENTS, supports_qnn=False, supports_onnx=False, - supports_precompiled_qnn_onnx=False, ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/sam/perf.yaml b/qai_hub_models/models/sam/perf.yaml index 675f8771..44aef4ec 100644 --- a/qai_hub_models/models/sam/perf.yaml +++ b/qai_hub_models/models/sam/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: SAMDecoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 29329.0 - throughput: 34.09594599202155 + inference_time: 28971.0 + throughput: 34.51727589658624 estimated_peak_memory_range: - min: 4284416 - max: 12544472 + min: 4026368 + max: 22304944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,7 +55,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 337 - job_id: jgo2k07dp + job_id: jpy14xx8p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -62,13 +64,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:23:54Z' + timestamp: '2024-11-09T22:13:19Z' - torchscript_onnx_tflite: - inference_time: 29111.0 - throughput: 34.35127614990897 + inference_time: 20306.0 + throughput: 49.246528119767554 estimated_peak_memory_range: - min: 4034560 - max: 12592296 + min: 2416640 + max: 241911808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -76,30 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 337 - job_id: jgjvzm68g - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:23:56Z' - - torchscript_onnx_tflite: - inference_time: 27549.0 - throughput: 36.29895821989909 - estimated_peak_memory_range: - min: 4014080 - max: 239412720 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 337 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 337 - job_id: jgz3o9q65 + job_id: jp8q3xxkp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -108,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:23:58Z' + timestamp: '2024-11-09T22:13:21Z' - torchscript_onnx_tflite: - inference_time: 20535.0 - throughput: 48.69734599464329 + inference_time: 16820.0 + throughput: 59.45303210463734 estimated_peak_memory_range: - min: 3088384 - max: 165195440 + min: 3354624 + max: 170960800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -122,7 +101,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 337 - job_id: jpy1rwl4p + job_id: j5q67yynp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -131,13 +110,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:24:09Z' + timestamp: '2024-11-09T22:13:22Z' - torchscript_onnx_tflite: - inference_time: 29132.0 - throughput: 34.32651379925855 + inference_time: 29100.0 + throughput: 34.36426116838488 estimated_peak_memory_range: - min: 4009984 - max: 23713472 + min: 3989504 + max: 12916360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -145,7 +124,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 337 - job_id: jg9lj17vg + job_id: j56y3776p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -154,13 +133,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:24:00Z' + timestamp: '2024-11-09T22:13:24Z' - torchscript_onnx_tflite: - inference_time: 28995.0 - throughput: 34.48870494912916 + inference_time: 28998.0 + throughput: 34.485136905993514 estimated_peak_memory_range: - min: 4022272 - max: 21595368 + min: 4001792 + max: 12116320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -168,7 +147,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 337 - job_id: jgn6m2nv5 + job_id: jgo21rrqp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -177,13 +156,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:17:23Z' + timestamp: '2024-11-09T22:13:26Z' - torchscript_onnx_tflite: - inference_time: 28956.0 - throughput: 34.535156789611825 + inference_time: 28935.0 + throughput: 34.560221185415585 estimated_peak_memory_range: - min: 4009984 - max: 22890992 + min: 4026368 + max: 12726032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -191,7 +170,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 337 - job_id: jp2k98wxp + job_id: jgjv07rvg job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -200,13 +179,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:17:25Z' + timestamp: '2024-11-09T22:13:28Z' - torchscript_onnx_tflite: - inference_time: 29243.0 - throughput: 34.196217898300446 + inference_time: 29074.0 + throughput: 34.39499208915182 estimated_peak_memory_range: - min: 3989504 - max: 12956776 + min: 4005888 + max: 12179296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -214,7 +193,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 337 - job_id: jp0z2yj25 + job_id: jgz3xmlo5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -223,13 +202,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:17:27Z' + timestamp: '2024-11-09T22:13:30Z' - torchscript_onnx_tflite: - inference_time: 32819.0 - throughput: 30.47015448368323 + inference_time: 36287.0 + throughput: 27.55807865075647 estimated_peak_memory_range: - min: 12288 - max: 229807824 + min: 4014080 + max: 164274880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -237,47 +216,47 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 337 - job_id: jprv8qreg + job_id: jg9l3mzwg job_status: Passed reference_device_info: - name: QCS8450 (Proxy) - os: '13' - form_factor: Xr + name: SA8295P ADP + os: '14' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:24:08Z' -- name: SAMEncoder - performance_metrics: + chipset: SA8295P + timestamp: '2024-11-09T22:13:32Z' - torchscript_onnx_tflite: - inference_time: 10962996.0 - throughput: 0.09121594133574436 + inference_time: 32956.0 + throughput: 30.34348828741352 estimated_peak_memory_range: - min: 129671168 - max: 133446632 - primary_compute_unit: CPU - precision: fp32 + min: 5107712 + max: 234925728 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 36 - layers_on_cpu: 782 - total_layers: 818 - job_id: jpv60oym5 + layers_on_npu: 337 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 337 + job_id: jgdxr3drp job_status: Passed reference_device_info: - name: Samsung Galaxy S23 + name: QCS8450 (Proxy) os: '13' - form_factor: Phone + form_factor: Xr os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:23:55Z' + manufacturer: Qualcomm + chipset: QCS8450 Proxy + timestamp: '2024-11-09T22:13:34Z' +- name: SAMEncoder + performance_metrics: - torchscript_onnx_tflite: - inference_time: 12098449.0 - throughput: 0.0826552229959394 + inference_time: 11666822.0 + throughput: 0.0857131445049903 estimated_peak_memory_range: - min: 129601536 - max: 132946592 + min: 129216512 + max: 134016832 primary_compute_unit: CPU precision: fp32 layer_info: @@ -285,7 +264,7 @@ models: layers_on_gpu: 36 layers_on_cpu: 782 total_layers: 818 - job_id: jpede1005 + job_id: jp0z1jj95 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -294,13 +273,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:23:56Z' + timestamp: '2024-11-09T22:13:19Z' - torchscript_onnx_tflite: - inference_time: 8407309.0 - throughput: 0.1189441235001592 + inference_time: 9304036.0 + throughput: 0.10748023760871088 estimated_peak_memory_range: - min: 115515392 - max: 1702190400 + min: 129339392 + max: 1711453184 primary_compute_unit: CPU precision: fp32 layer_info: @@ -308,7 +287,7 @@ models: layers_on_gpu: 36 layers_on_cpu: 782 total_layers: 818 - job_id: j5we2v0j5 + job_id: jgkel44wg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -317,13 +296,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:23:58Z' + timestamp: '2024-11-09T22:13:21Z' - torchscript_onnx_tflite: - inference_time: 6618752.0 - throughput: 0.15108588446885454 + inference_time: 6695328.0 + throughput: 0.14935788059972566 estimated_peak_memory_range: - min: 127934464 - max: 1673110976 + min: 102780928 + max: 1651288528 primary_compute_unit: CPU precision: fp32 layer_info: @@ -331,7 +310,7 @@ models: layers_on_gpu: 36 layers_on_cpu: 782 total_layers: 818 - job_id: jp0z3qwe5 + job_id: jglv0xxj5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -340,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:24:10Z' + timestamp: '2024-11-09T22:13:23Z' - torchscript_onnx_tflite: - inference_time: 10848584.0 - throughput: 0.09217792847435205 + inference_time: 11003431.0 + throughput: 0.0908807443787306 estimated_peak_memory_range: - min: 128131072 - max: 298752032 + min: 129163264 + max: 132715880 primary_compute_unit: CPU precision: fp32 layer_info: @@ -354,7 +333,7 @@ models: layers_on_gpu: 36 layers_on_cpu: 782 total_layers: 818 - job_id: jp14ylklp + job_id: jp3j4993g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -363,13 +342,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:24:00Z' + timestamp: '2024-11-09T22:13:25Z' - torchscript_onnx_tflite: - inference_time: 10767075.0 - throughput: 0.09287573458901327 + inference_time: 10520745.0 + throughput: 0.09505030299660337 estimated_peak_memory_range: - min: 128987136 - max: 132344744 + min: 129433600 + max: 133032544 primary_compute_unit: CPU precision: fp32 layer_info: @@ -377,7 +356,7 @@ models: layers_on_gpu: 36 layers_on_cpu: 782 total_layers: 818 - job_id: jprv2k0vg + job_id: j5mnwj9wp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -386,13 +365,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:17:24Z' + timestamp: '2024-11-11T13:57:51Z' - torchscript_onnx_tflite: - inference_time: 11409163.0 - throughput: 0.0876488485614589 + inference_time: 10821202.0 + throughput: 0.09241117576402326 estimated_peak_memory_range: - min: 129753088 - max: 132982104 + min: 129155072 + max: 133129392 primary_compute_unit: CPU precision: fp32 layer_info: @@ -400,7 +379,7 @@ models: layers_on_gpu: 36 layers_on_cpu: 782 total_layers: 818 - job_id: jpy1jexrp + job_id: jpedrz7o5 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -409,13 +388,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:17:26Z' + timestamp: '2024-11-09T22:13:28Z' - torchscript_onnx_tflite: - inference_time: 11134085.0 - throughput: 0.08981429547196738 + inference_time: 10167032.0 + throughput: 0.0983571213309843 estimated_peak_memory_range: - min: 129359872 - max: 133218288 + min: 42749952 + max: 137887904 primary_compute_unit: CPU precision: fp32 layer_info: @@ -423,7 +402,7 @@ models: layers_on_gpu: 36 layers_on_cpu: 782 total_layers: 818 - job_id: jp8qmoxzp + job_id: j5wed7l35 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -432,13 +411,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:17:28Z' + timestamp: '2024-11-09T22:13:30Z' + - torchscript_onnx_tflite: + inference_time: 10764145.0 + throughput: 0.0929010153616474 + estimated_peak_memory_range: + min: 129937408 + max: 1726210080 + primary_compute_unit: CPU + precision: fp32 + layer_info: + layers_on_npu: 0 + layers_on_gpu: 36 + layers_on_cpu: 782 + total_layers: 818 + job_id: jp14djn8p + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:13:32Z' - torchscript_onnx_tflite: - inference_time: 14506393.0 - throughput: 0.06893512398292256 + inference_time: 13901352.0 + throughput: 0.07193544915631228 estimated_peak_memory_range: - min: 59883520 - max: 1696776240 + min: 138633216 + max: 1777059136 primary_compute_unit: CPU precision: fp32 layer_info: @@ -446,7 +448,7 @@ models: layers_on_gpu: 36 layers_on_cpu: 782 total_layers: 818 - job_id: jp2k061mp + job_id: j57yj4ev5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -455,4 +457,4 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:24:08Z' + timestamp: '2024-11-09T22:13:34Z' diff --git a/qai_hub_models/models/sesr_m5/README.md b/qai_hub_models/models/sesr_m5/README.md index 2c7bfd68..29b27ffa 100644 --- a/qai_hub_models/models/sesr_m5/README.md +++ b/qai_hub_models/models/sesr_m5/README.md @@ -5,8 +5,7 @@ SESR M5 performs efficient on-device upscaling of images. -This is based on the implementation of SESR-M5 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/sesr_m5). diff --git a/qai_hub_models/models/sesr_m5/evaluate.py b/qai_hub_models/models/sesr_m5/evaluate.py new file mode 100644 index 00000000..a09a3b92 --- /dev/null +++ b/qai_hub_models/models/sesr_m5/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.sesr_m5 import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["bsd300"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=100, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/sesr_m5/perf.yaml b/qai_hub_models/models/sesr_m5/perf.yaml index ab8e4471..f6cf2127 100644 --- a/qai_hub_models/models/sesr_m5/perf.yaml +++ b/qai_hub_models/models/sesr_m5/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: SESR-M5 performance_metrics: - torchscript_onnx_tflite: - inference_time: 2279.0 - throughput: 438.7889425186485 + inference_time: 2225.0 + throughput: 449.438202247191 estimated_peak_memory_range: - min: 24576 - max: 1411328 + min: 16384 + max: 5454392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: j5q6ekdmp + job_id: jpxk74x35 job_status: Passed torchscript_onnx_qnn: - inference_time: 2139.0 - throughput: 467.50818139317437 + inference_time: 2118.0 + throughput: 472.14353163361665 estimated_peak_memory_range: - min: 20480 - max: 4415696 + min: 24576 + max: 4580568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j5we2vrj5 + job_id: j5q67yvnp job_status: Passed torchscript_onnx: - inference_time: 2724.0 - throughput: 367.1071953010279 + inference_time: 2693.0 + throughput: 371.3330857779428 estimated_peak_memory_range: - min: 12288 - max: 11904720 + min: 212992 + max: 1648400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 33 - job_id: jp2k06qmp + job_id: jg9l3mmwg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:22:49Z' + timestamp: '2024-11-09T22:12:15Z' - torchscript_onnx_tflite: - inference_time: 2214.0 - throughput: 451.6711833785005 + inference_time: 1595.0 + throughput: 626.9592476489029 estimated_peak_memory_range: - min: 45056 - max: 4578144 + min: 20480 + max: 28600160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,67 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: jglv6zql5 + job_id: j5mnwm8dp job_status: Passed torchscript_onnx_qnn: - inference_time: 2136.0 - throughput: 468.1647940074906 - estimated_peak_memory_range: - min: 28672 - max: 74882496 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 31 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 31 - job_id: jg9lj1qvg - job_status: Passed - torchscript_onnx: - inference_time: 2708.0 - throughput: 369.2762186115214 + inference_time: 1456.0 + throughput: 686.8131868131868 estimated_peak_memory_range: min: 0 - max: 1954752 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 33 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 33 - job_id: jpy1rwk4p - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:22:50Z' - - torchscript_onnx_tflite: - inference_time: 1739.0 - throughput: 575.0431282346176 - estimated_peak_memory_range: - min: 16384 - max: 29223264 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 22 - layers_on_gpu: 0 - layers_on_cpu: 3 - total_layers: 25 - job_id: j56yej07p - job_status: Passed - torchscript_onnx_qnn: - inference_time: 1657.0 - throughput: 603.5003017501509 - estimated_peak_memory_range: - min: 208896 - max: 16103376 + max: 15628336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jp14ylmlp + job_id: jglv0xlj5 job_status: Passed torchscript_onnx: - inference_time: 2106.0 - throughput: 474.8338081671415 + inference_time: 1939.0 + throughput: 515.7297576070139 estimated_peak_memory_range: min: 0 - max: 31603552 + max: 31909760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,7 +138,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 33 - job_id: jp0z3q8e5 + job_id: jp14djj8p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -198,13 +147,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:22:51Z' + timestamp: '2024-11-09T22:12:16Z' - torchscript_onnx_tflite: - inference_time: 1376.0 - throughput: 726.7441860465116 + inference_time: 1693.0 + throughput: 590.6674542232723 estimated_peak_memory_range: min: 12288 - max: 16725312 + max: 17057408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: jgz3o9065 + job_id: jgn69nkk5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1290.0 - throughput: 775.1937984496124 + inference_time: 1289.0 + throughput: 775.7951900698216 estimated_peak_memory_range: min: 0 - max: 9706016 + max: 10545888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jprv8qmeg + job_id: j56y37w6p job_status: Passed torchscript_onnx: - inference_time: 2003.0 - throughput: 499.2511233150275 + inference_time: 1999.0 + throughput: 500.25012506253125 estimated_peak_memory_range: min: 0 - max: 17477200 + max: 16926736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -242,7 +191,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 33 - job_id: jgke7nwog + job_id: jgdxr33rp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -251,13 +200,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:22:53Z' + timestamp: '2024-11-09T22:12:17Z' - torchscript_onnx_tflite: - inference_time: 2218.0 - throughput: 450.8566275924256 + inference_time: 2234.0 + throughput: 447.6275738585497 estimated_peak_memory_range: - min: 16384 - max: 1267712 + min: 28672 + max: 2966696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: jp3jv3rzg + job_id: jprv40w0g job_status: Passed torchscript_onnx_qnn: - inference_time: 2154.0 - throughput: 464.2525533890436 + inference_time: 2176.0 + throughput: 459.55882352941177 estimated_peak_memory_range: - min: 225280 - max: 1466072 + min: 233472 + max: 2006536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,7 +229,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jgdxe9mlp + job_id: jp3j4963g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -289,13 +238,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:22:41Z' + timestamp: '2024-11-09T22:12:08Z' - torchscript_onnx_tflite: - inference_time: 2195.0 - throughput: 455.58086560364467 + inference_time: 2208.0 + throughput: 452.8985507246377 estimated_peak_memory_range: - min: 28672 - max: 8706640 + min: 32768 + max: 23467864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: jgjv2xq7g + job_id: jp2k7werp job_status: Passed torchscript_onnx_qnn: - inference_time: 2143.0 - throughput: 466.63555762949136 + inference_time: 2141.0 + throughput: 467.07146193367583 estimated_peak_memory_range: - min: 221184 - max: 1521016 + min: 229376 + max: 4868080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,7 +267,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j5we3o4z5 + job_id: jpv61ddk5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -327,13 +276,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:16:26Z' + timestamp: '2024-11-09T22:12:10Z' - torchscript_onnx_tflite: - inference_time: 2190.0 - throughput: 456.62100456621005 + inference_time: 2203.0 + throughput: 453.92646391284615 estimated_peak_memory_range: - min: 24576 - max: 1456184 + min: 28672 + max: 8778272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: jpedw9y75 + job_id: jpy14xm8p job_status: Passed torchscript_onnx_qnn: inference_time: 2146.0 throughput: 465.98322460391427 estimated_peak_memory_range: min: 229376 - max: 1558488 + max: 1825624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,7 +305,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jg9lyvdqg + job_id: jgjv077vg job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -365,13 +314,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:16:27Z' + timestamp: '2024-11-09T22:12:11Z' - torchscript_onnx_tflite: - inference_time: 2219.0 - throughput: 450.6534474988734 + inference_time: 2149.0 + throughput: 465.33271288971616 estimated_peak_memory_range: min: 32768 - max: 1361064 + max: 6659648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: jgz3jenz5 + job_id: jp0z1j695 job_status: Passed torchscript_onnx_qnn: - inference_time: 2146.0 - throughput: 465.98322460391427 + inference_time: 2151.0 + throughput: 464.9000464900046 estimated_peak_memory_range: min: 229376 - max: 1628712 + max: 1557992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,7 +343,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jp14w06kp + job_id: jpedrzzo5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -403,13 +352,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:16:28Z' + timestamp: '2024-11-09T22:12:12Z' - torchscript_onnx_tflite: - inference_time: 4157.0 - throughput: 240.55809477988933 + inference_time: 4143.0 + throughput: 241.3709872073377 estimated_peak_memory_range: - min: 6336512 - max: 35313232 + min: 6311936 + max: 22317136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +366,52 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: jpede1n05 + job_id: jp8q3x1kp job_status: Passed torchscript_onnx_qnn: - inference_time: 3217.0 - throughput: 310.8486167236556 + inference_time: 3474.0 + throughput: 287.85261945883707 estimated_peak_memory_range: - min: 204800 - max: 15591504 + min: 0 + max: 5964176 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 31 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 31 + job_id: jgz3xmmo5 + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:12:13Z' + - torchscript_onnx_tflite: + inference_time: 3400.0 + throughput: 294.11764705882354 + estimated_peak_memory_range: + min: 6324224 + max: 35041104 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 22 + layers_on_gpu: 0 + layers_on_cpu: 3 + total_layers: 25 + job_id: jgkel48wg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 3144.0 + throughput: 318.06615776081424 + estimated_peak_memory_range: + min: 208896 + max: 16070592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jgn6lydm5 + job_id: j5wed7735 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:22:47Z' + timestamp: '2024-11-09T22:12:14Z' - torchscript_onnx_qnn: - inference_time: 2377.0 - throughput: 420.69835927639883 + inference_time: 2357.0 + throughput: 424.26813746287655 estimated_peak_memory_range: min: 221184 max: 221184 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j57y0w1r5 + job_id: jgo21r8qp job_status: Passed torchscript_onnx: - inference_time: 3675.0 - throughput: 272.10884353741494 + inference_time: 2955.0 + throughput: 338.40947546531305 estimated_peak_memory_range: - min: 8978432 - max: 8978432 + min: 8933376 + max: 8933376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 33 - job_id: jp8q09d8p + job_id: j57yj44v5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:22:52Z' + timestamp: '2024-11-09T22:12:18Z' diff --git a/qai_hub_models/models/sesr_m5_quantized/README.md b/qai_hub_models/models/sesr_m5_quantized/README.md index 4af66e25..429a58fc 100644 --- a/qai_hub_models/models/sesr_m5_quantized/README.md +++ b/qai_hub_models/models/sesr_m5_quantized/README.md @@ -5,8 +5,7 @@ SESR M5 performs efficient on-device upscaling of images. -This is based on the implementation of SESR-M5-Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/sesr_m5_quantized). diff --git a/qai_hub_models/models/sesr_m5_quantized/conftest.py b/qai_hub_models/models/sesr_m5_quantized/conftest.py index dfe40840..ff9cd700 100644 --- a/qai_hub_models/models/sesr_m5_quantized/conftest.py +++ b/qai_hub_models/models/sesr_m5_quantized/conftest.py @@ -9,7 +9,6 @@ import pytest from qai_hub_models.models.sesr_m5_quantized import Model -from qai_hub_models.utils.testing import skip_clone_repo_check # Instantiate the model only once for all tests. @@ -22,7 +21,6 @@ def cached_from_pretrained(): from_pretrained = Model.from_pretrained sig = inspect.signature(from_pretrained) - @skip_clone_repo_check def _cached_from_pretrained(*args, **kwargs): cache_key = str(args) + str(kwargs) model = pretrained_cache.get(cache_key, None) diff --git a/qai_hub_models/models/sesr_m5_quantized/evaluate.py b/qai_hub_models/models/sesr_m5_quantized/evaluate.py new file mode 100644 index 00000000..19b9e1c8 --- /dev/null +++ b/qai_hub_models/models/sesr_m5_quantized/evaluate.py @@ -0,0 +1,55 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.sesr_m5_quantized import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["bsd300"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=100, + supported_datasets=SUPPORTED_DATASETS, + is_hub_quantized=True, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/sesr_m5_quantized/export.py b/qai_hub_models/models/sesr_m5_quantized/export.py index e1a34d1a..bcdccbf9 100644 --- a/qai_hub_models/models/sesr_m5_quantized/export.py +++ b/qai_hub_models/models/sesr_m5_quantized/export.py @@ -13,6 +13,7 @@ from typing import Any, Optional, cast import qai_hub as hub +import torch from qai_hub_models.models.common import ExportResult, TargetRuntime from qai_hub_models.models.sesr_m5_quantized import Model @@ -22,6 +23,7 @@ get_model_kwargs, ) from qai_hub_models.utils.compare import torch_inference +from qai_hub_models.utils.input_spec import make_torch_inputs from qai_hub_models.utils.printing import ( print_inference_metrics, print_on_target_demo_cmd, @@ -31,11 +33,14 @@ can_access_qualcomm_ai_hub, export_without_hub_access, ) +from qai_hub_models.utils.quantization import get_calibration_data def export_model( device: Optional[str] = None, chipset: Optional[str] = None, + num_calibration_samples: int = 100, + skip_compiling: bool = False, skip_profiling: bool = False, skip_inferencing: bool = False, skip_downloading: bool = False, @@ -50,13 +55,14 @@ def export_model( This function executes the following recipe: 1. Instantiates a PyTorch model and converts it to a traced TorchScript format - 2. Compiles the model to an asset that can be run on device - 3. Profiles the model performance on a real device - 4. Inferences the model on sample inputs - 5. Downloads the model asset to the local directory - 6. Summarizes the results from profiling and inference + 2. Converts the PyTorch model to ONNX and quantizes the ONNX model. + 3. Compiles the model to an asset that can be run on device + 4. Profiles the model performance on a real device + 5. Inferences the model on sample inputs + 6. Downloads the model asset to the local directory + 7. Summarizes the results from profiling and inference - Each of the last 4 steps can be optionally skipped using the input options. + Each of the last 5 steps can be optionally skipped using the input options. Parameters: device: Device for which to export the model. @@ -64,6 +70,9 @@ def export_model( Defaults to DEFAULT_DEVICE if not specified. chipset: If set, will choose a random device with this chipset. Overrides the `device` argument. + num_calibration_samples: The number of calibration data samples + to use for quantization. + skip_compiling: If set, skips compiling model to format that can run on device. skip_profiling: If set, skips profiling of compiled model on real devices. skip_inferencing: If set, skips computing on-device outputs from sample data. skip_downloading: If set, skips downloading of compiled model. @@ -79,9 +88,10 @@ def export_model( Returns: A struct of: - * A CompileJob object containing metadata about the compile job submitted to hub. + * A CompileJob object containing metadata about the compile job submitted to hub (None if compiling skipped). * An InferenceJob containing metadata about the inference job (None if inferencing skipped). * A ProfileJob containing metadata about the profile job (None if profiling skipped). + * A QuantizeJob object containing metadata about the quantize job submitted to hub """ model_name = "sesr_m5_quantized" output_path = Path(output_dir or Path.cwd() / "build" / model_name) @@ -115,26 +125,45 @@ def export_model( ) # Trace the model - source_model = model.convert_to_hub_source_model( - target_runtime, output_path, input_spec + source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) + + print(f"Quantizing model {model_name} with {num_calibration_samples} samples.") + # 2. Converts the PyTorch model to ONNX and quantizes the ONNX model. + onnx_compile_job = hub.submit_compile_job( + model=source_model, + input_specs=input_spec, + device=hub_device, + name=model_name, + options="--target_runtime onnx", + ) + quantize_job = hub.submit_quantize_job( + model=onnx_compile_job.get_target_model(), + calibration_data=get_calibration_data( + input_spec, "bsd300", num_calibration_samples + ), + weights_dtype=model.get_weights_dtype(), + activations_dtype=model.get_activations_dtype(), + name=model_name, + options=model.get_quantize_options(), ) + if skip_compiling: + return ExportResult(quantize_job=quantize_job) - # 2. Compiles the model to an asset that can be run on device + # 3. Compiles the model to an asset that can be run on device model_compile_options = model.get_hub_compile_options( target_runtime, compile_options, hub_device ) print(f"Optimizing model {model_name} to run on-device") submitted_compile_job = hub.submit_compile_job( - model=source_model, + model=quantize_job.get_target_model(), input_specs=input_spec, device=hub_device, name=model_name, - calibration_data=model.get_calibration_data(target_runtime), options=model_compile_options, ) compile_job = cast(hub.client.CompileJob, submitted_compile_job) - # 3. Profiles the model performance on a real device + # 4. Profiles the model performance on a real device profile_job: Optional[hub.client.ProfileJob] = None if not skip_profiling: profile_options_all = model.get_hub_profile_options( @@ -149,7 +178,7 @@ def export_model( ) profile_job = cast(hub.client.ProfileJob, submitted_profile_job) - # 4. Inferences the model on sample inputs + # 5. Inferences the model on sample inputs inference_job: Optional[hub.client.InferenceJob] = None if not skip_inferencing: profile_options_all = model.get_hub_profile_options( @@ -170,13 +199,13 @@ def export_model( ) inference_job = cast(hub.client.InferenceJob, submitted_inference_job) - # 5. Downloads the model asset to the local directory + # 6. Downloads the model asset to the local directory if not skip_downloading: os.makedirs(output_path, exist_ok=True) target_model: hub.Model = compile_job.get_target_model() # type: ignore target_model.download(str(output_path / model_name)) - # 6. Summarizes the results from profiling and inference + # 7. Summarizes the results from profiling and inference if not skip_summary and not skip_profiling: assert profile_job is not None and profile_job.wait().success profile_data: dict[str, Any] = profile_job.download_profile() # type: ignore @@ -201,12 +230,13 @@ def export_model( compile_job=compile_job, inference_job=inference_job, profile_job=profile_job, + quantize_job=quantize_job, ) def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, is_hub_quantized=True) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/sesr_m5_quantized/model.py b/qai_hub_models/models/sesr_m5_quantized/model.py index c6638c3d..667b0453 100644 --- a/qai_hub_models/models/sesr_m5_quantized/model.py +++ b/qai_hub_models/models/sesr_m5_quantized/model.py @@ -4,73 +4,11 @@ # --------------------------------------------------------------------- from __future__ import annotations -# isort: off -# This verifies aimet is installed, and this must be included first. -from qai_hub_models.utils.quantization_aimet import ( - AIMETQuantizableMixin, -) - -# isort: on - -import torch -from aimet_torch.cross_layer_equalization import equalize_model -from aimet_torch.model_preparer import prepare_model -from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim - -from qai_hub_models.models._shared.super_resolution.model import DEFAULT_SCALE_FACTOR from qai_hub_models.models.sesr_m5.model import SESR_M5 -from qai_hub_models.utils.aimet.config_loader import get_default_per_tensor_aimet_config -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset -from qai_hub_models.utils.quantization_aimet import ( - constrain_quantized_inputs_to_image_range, -) +from qai_hub_models.utils.quantization import HubQuantizableMixin MODEL_ID = __name__.split(".")[-2] -MODEL_ASSET_VERSION = 5 -DEFAULT_ENCODINGS = "sesr_m5_quantized_encodings.json" - - -class SESR_M5Quantizable(AIMETQuantizableMixin, SESR_M5): - """SESR_M5 with post train quantization support. - - Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints. - Support for quantizing using your own weights & data will come at a later date.""" - - def __init__( - self, - sesr_model: QuantizationSimModel, - scale_factor: int, - ) -> None: - SESR_M5.__init__(self, sesr_model.model, scale_factor) - AIMETQuantizableMixin.__init__(self, sesr_model) - - @classmethod - def from_pretrained( - cls, - aimet_encodings: str | None = "DEFAULT", - scale_factor: int = DEFAULT_SCALE_FACTOR, - ) -> SESR_M5Quantizable: - # Load Model - sesr = SESR_M5.from_pretrained(scale_factor) - input_shape = SESR_M5.get_input_spec()["image"][0] - sesr = prepare_model(sesr) - equalize_model(sesr, input_shape) - - sim = QuantizationSimModel( - sesr, - quant_scheme="tf_enhanced", - default_param_bw=8, - default_output_bw=8, - config_file=get_default_per_tensor_aimet_config(), - dummy_input=torch.rand(input_shape), - ) - constrain_quantized_inputs_to_image_range(sim) - if aimet_encodings: - if aimet_encodings == "DEFAULT": - aimet_encodings = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_ENCODINGS - ).fetch() - load_encodings_to_sim(sim, aimet_encodings) - return cls(sim, scale_factor) +class SESR_M5Quantizable(HubQuantizableMixin, SESR_M5): + pass diff --git a/qai_hub_models/models/sesr_m5_quantized/perf.yaml b/qai_hub_models/models/sesr_m5_quantized/perf.yaml index b0c73c6f..537a2777 100644 --- a/qai_hub_models/models/sesr_m5_quantized/perf.yaml +++ b/qai_hub_models/models/sesr_m5_quantized/perf.yaml @@ -46,64 +46,11 @@ models: - name: SESR-M5-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1521.0 - throughput: 657.4621959237344 - estimated_peak_memory_range: - min: 270336 - max: 1761096 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 24 - layers_on_gpu: 0 - layers_on_cpu: 3 - total_layers: 27 - job_id: jpxknjr95 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 976.0 - throughput: 1024.5901639344263 - estimated_peak_memory_range: - min: 16384 - max: 3760752 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 31 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 31 - job_id: jp3jv37zg - job_status: Passed - torchscript_onnx: - inference_time: 1087.0 - throughput: 919.9632014719411 + inference_time: 1360.0 + throughput: 735.2941176470588 estimated_peak_memory_range: min: 12288 - max: 16713952 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 48 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 48 - job_id: jpxknjz95 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:22:09Z' - - torchscript_onnx_tflite: - inference_time: 1337.0 - throughput: 747.9431563201197 - estimated_peak_memory_range: - min: 28672 - max: 53439416 + max: 2760616 primary_compute_unit: NPU precision: int8 layer_info: @@ -111,14 +58,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 27 - job_id: j5mnq2kqp + job_id: jpxk7z395 job_status: Passed torchscript_onnx_qnn: - inference_time: 975.0 - throughput: 1025.6410256410256 + inference_time: 977.0 + throughput: 1023.5414534288639 estimated_peak_memory_range: - min: 12288 - max: 10440512 + min: 20480 + max: 10034960 primary_compute_unit: NPU precision: int8 layer_info: @@ -126,22 +73,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jgo2k0wdp + job_id: jp3j4rxzg job_status: Passed torchscript_onnx: - inference_time: 1077.0 - throughput: 928.5051067780872 + inference_time: 3084.0 + throughput: 324.25421530479895 estimated_peak_memory_range: min: 69632 - max: 1427400 + max: 2783528 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 48 + layers_on_npu: 73 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 48 - job_id: j5mnq2lqp + layers_on_cpu: 8 + total_layers: 81 + job_id: jg9l3qxlg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -150,13 +97,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:22:10Z' + timestamp: '2024-11-09T23:31:25Z' - torchscript_onnx_tflite: - inference_time: 1124.0 - throughput: 889.6797153024911 + inference_time: 1120.0 + throughput: 892.8571428571429 estimated_peak_memory_range: min: 20480 - max: 26610432 + max: 26941072 primary_compute_unit: NPU precision: int8 layer_info: @@ -164,14 +111,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 27 - job_id: jgn6lyqm5 + job_id: j5mnwloqp job_status: Passed torchscript_onnx_qnn: - inference_time: 709.0 - throughput: 1410.4372355430182 + inference_time: 706.0 + throughput: 1416.4305949008499 estimated_peak_memory_range: - min: 65536 - max: 16567280 + min: 61440 + max: 12866832 primary_compute_unit: NPU precision: int8 layer_info: @@ -179,22 +126,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jpv60omm5 + job_id: jgo219odp job_status: Passed torchscript_onnx: - inference_time: 833.0 - throughput: 1200.4801920768307 + inference_time: 2217.0 + throughput: 451.05999097880016 estimated_peak_memory_range: min: 0 - max: 31195472 + max: 78103568 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 48 + layers_on_npu: 73 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 48 - job_id: jgn6lywm5 + layers_on_cpu: 8 + total_layers: 81 + job_id: jp14dmv2p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -203,13 +150,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:22:11Z' + timestamp: '2024-11-09T23:31:28Z' - torchscript_onnx_tflite: - inference_time: 1490.0 - throughput: 671.1409395973154 + inference_time: 1592.0 + throughput: 628.1407035175879 estimated_peak_memory_range: - min: 12288 - max: 17977712 + min: 8192 + max: 18601696 primary_compute_unit: NPU precision: int8 layer_info: @@ -217,14 +164,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 27 - job_id: jglv6z8l5 + job_id: jgn69wom5 job_status: Passed torchscript_onnx_qnn: - inference_time: 604.0 - throughput: 1655.6291390728477 + inference_time: 728.0 + throughput: 1373.6263736263736 estimated_peak_memory_range: - min: 61440 - max: 11942656 + min: 57344 + max: 12502320 primary_compute_unit: NPU precision: int8 layer_info: @@ -232,22 +179,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j57y0w8r5 + job_id: jpv61nem5 job_status: Passed torchscript_onnx: - inference_time: 612.0 - throughput: 1633.986928104575 + inference_time: 2049.0 + throughput: 488.0429477794046 estimated_peak_memory_range: - min: 57344 - max: 21326416 + min: 8192 + max: 59810464 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 48 + layers_on_npu: 73 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 48 - job_id: jp2k06zmp + layers_on_cpu: 8 + total_layers: 81 + job_id: jgdxrmzep job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -256,13 +203,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:22:13Z' + timestamp: '2024-11-09T23:31:31Z' - torchscript_onnx_tflite: - inference_time: 4154.0 - throughput: 240.73182474723157 + inference_time: 3602.0 + throughput: 277.623542476402 estimated_peak_memory_range: - min: 1585152 - max: 21395104 + min: 1622016 + max: 21300624 primary_compute_unit: NPU precision: int8 layer_info: @@ -270,14 +217,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 27 - job_id: jprv8qdeg + job_id: jprv47oeg job_status: Passed torchscript_onnx_qnn: - inference_time: 2925.0 - throughput: 341.88034188034186 + inference_time: 3106.0 + throughput: 321.9575016097875 estimated_peak_memory_range: - min: 61440 - max: 7566816 + min: 65536 + max: 8025216 primary_compute_unit: NPU precision: int8 layer_info: @@ -285,7 +232,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jgjvzmy8g + job_id: jgjv08o8g job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -294,13 +241,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-26T23:21:59Z' + timestamp: '2024-11-09T23:31:03Z' - torchscript_onnx_tflite: - inference_time: 20540.0 - throughput: 48.685491723466406 + inference_time: 21818.0 + throughput: 45.833715280960675 estimated_peak_memory_range: - min: 1265664 - max: 9447488 + min: 1372160 + max: 4572216 primary_compute_unit: NPU precision: int8 layer_info: @@ -308,7 +255,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 27 - job_id: jp2k06dmp + job_id: jp2k7z4mp job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -317,13 +264,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-26T23:21:47Z' + timestamp: '2024-11-09T23:30:35Z' - torchscript_onnx_tflite: - inference_time: 1341.0 - throughput: 745.7121551081283 + inference_time: 1334.0 + throughput: 749.6251874062968 estimated_peak_memory_range: - min: 16384 - max: 1378592 + min: 12288 + max: 1421088 primary_compute_unit: NPU precision: int8 layer_info: @@ -331,14 +278,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 27 - job_id: jpy1rw24p + job_id: jpy14yq4p job_status: Passed torchscript_onnx_qnn: - inference_time: 684.0 - throughput: 1461.9883040935672 + inference_time: 689.0 + throughput: 1451.3788098693758 estimated_peak_memory_range: - min: 81920 - max: 1371400 + min: 77824 + max: 2109720 primary_compute_unit: NPU precision: int8 layer_info: @@ -346,7 +293,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jpede1x05 + job_id: jpedrn805 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -355,13 +302,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:22:00Z' + timestamp: '2024-11-09T23:31:05Z' - torchscript_onnx_tflite: - inference_time: 1353.0 - throughput: 739.0983000739099 + inference_time: 1350.0 + throughput: 740.7407407407408 estimated_peak_memory_range: min: 12288 - max: 3358320 + max: 77741792 primary_compute_unit: NPU precision: int8 layer_info: @@ -369,14 +316,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 27 - job_id: jp2k98exp + job_id: jp0z1xde5 job_status: Passed torchscript_onnx_qnn: - inference_time: 682.0 - throughput: 1466.275659824047 + inference_time: 692.0 + throughput: 1445.086705202312 estimated_peak_memory_range: min: 81920 - max: 1457032 + max: 1367712 primary_compute_unit: NPU precision: int8 layer_info: @@ -384,7 +331,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jgkeqz8yg + job_id: j5wedr8j5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -393,13 +340,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:16:07Z' + timestamp: '2024-11-09T23:31:11Z' - torchscript_onnx_tflite: - inference_time: 1350.0 - throughput: 740.7407407407408 + inference_time: 1329.0 + throughput: 752.4454477050414 estimated_peak_memory_range: - min: 12288 - max: 76827200 + min: 61440 + max: 1842928 primary_compute_unit: NPU precision: int8 layer_info: @@ -407,14 +354,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 27 - job_id: jpy1jemrp + job_id: jp8q3k68p job_status: Passed torchscript_onnx_qnn: - inference_time: 682.0 - throughput: 1466.275659824047 + inference_time: 692.0 + throughput: 1445.086705202312 estimated_peak_memory_range: - min: 81920 - max: 1441336 + min: 73728 + max: 1392112 primary_compute_unit: NPU precision: int8 layer_info: @@ -422,7 +369,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j5q6r8v7p + job_id: jg9l3qkvg job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -431,13 +378,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:16:08Z' + timestamp: '2024-11-09T23:31:14Z' - torchscript_onnx_tflite: - inference_time: 1339.0 - throughput: 746.8259895444362 + inference_time: 1362.0 + throughput: 734.2143906020558 estimated_peak_memory_range: - min: 57344 - max: 2034976 + min: 28672 + max: 1404472 primary_compute_unit: NPU precision: int8 layer_info: @@ -445,14 +392,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 27 - job_id: jp8qmo1zp + job_id: jgkelkoog job_status: Passed torchscript_onnx_qnn: - inference_time: 693.0 - throughput: 1443.001443001443 + inference_time: 696.0 + throughput: 1436.7816091954023 estimated_peak_memory_range: - min: 81920 - max: 1203456 + min: 98304 + max: 1703808 primary_compute_unit: NPU precision: int8 layer_info: @@ -460,7 +407,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j56yz6wvp + job_id: jp14dm7lp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -469,13 +416,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:16:10Z' + timestamp: '2024-11-09T23:31:17Z' - torchscript_onnx_tflite: - inference_time: 2924.0 - throughput: 341.9972640218878 + inference_time: 2612.0 + throughput: 382.84839203675347 estimated_peak_memory_range: - min: 1593344 - max: 19405472 + min: 1613824 + max: 20124768 primary_compute_unit: NPU precision: int8 layer_info: @@ -483,14 +430,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 27 - job_id: jp0z2y625 + job_id: j5q67dzmp job_status: Passed torchscript_onnx_qnn: - inference_time: 2013.0 - throughput: 496.7709885742673 + inference_time: 1545.0 + throughput: 647.2491909385113 estimated_peak_memory_range: min: 12288 - max: 5682512 + max: 5810960 primary_compute_unit: NPU precision: int8 layer_info: @@ -498,7 +445,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jglv2nle5 + job_id: jgdxrm8lp job_status: Passed reference_device_info: name: SA8295P ADP @@ -507,13 +454,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T01:16:09Z' + timestamp: '2024-11-09T23:31:20Z' - torchscript_onnx_tflite: - inference_time: 1702.0 - throughput: 587.5440658049354 + inference_time: 1730.0 + throughput: 578.0346820809249 estimated_peak_memory_range: - min: 16384 - max: 27117488 + min: 12288 + max: 27350560 primary_compute_unit: NPU precision: int8 layer_info: @@ -521,14 +468,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 27 - job_id: j5q6ek1mp + job_id: jglv0qol5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1104.0 - throughput: 905.7971014492754 + inference_time: 1125.0 + throughput: 888.8888888888889 estimated_peak_memory_range: - min: 61440 - max: 17620448 + min: 65536 + max: 16599248 primary_compute_unit: NPU precision: int8 layer_info: @@ -536,7 +483,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jgdxe94lp + job_id: j5wedr165 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -545,13 +492,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:22:06Z' + timestamp: '2024-11-09T23:31:22Z' - torchscript_onnx_qnn: - inference_time: 830.0 - throughput: 1204.8192771084337 + inference_time: 829.0 + throughput: 1206.2726176115802 estimated_peak_memory_range: - min: 249856 - max: 249856 + min: 163840 + max: 163840 primary_compute_unit: NPU precision: int8 layer_info: @@ -559,22 +506,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jgz3o9y65 + job_id: jgz3x0865 job_status: Passed torchscript_onnx: - inference_time: 1228.0 - throughput: 814.3322475570033 + inference_time: 3574.0 + throughput: 279.79854504756577 estimated_peak_memory_range: - min: 3375104 - max: 3375104 + min: 2351104 + max: 2351104 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 48 + layers_on_npu: 73 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 48 - job_id: jprv8q7eg + layers_on_cpu: 8 + total_layers: 81 + job_id: j57yj87l5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -583,4 +530,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:22:12Z' + timestamp: '2024-11-09T23:31:34Z' diff --git a/qai_hub_models/models/sesr_m5_quantized/test.py b/qai_hub_models/models/sesr_m5_quantized/test.py deleted file mode 100644 index 0ac444df..00000000 --- a/qai_hub_models/models/sesr_m5_quantized/test.py +++ /dev/null @@ -1,89 +0,0 @@ -# --------------------------------------------------------------------- -# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -# --------------------------------------------------------------------- -import os -import zipfile - -import numpy as np -import pytest -import torch - -from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp -from qai_hub_models.models._shared.super_resolution.demo import IMAGE_ADDRESS -from qai_hub_models.models.sesr_m5.model import MODEL_ASSET_VERSION, MODEL_ID -from qai_hub_models.models.sesr_m5_quantized.demo import main as demo_main -from qai_hub_models.models.sesr_m5_quantized.model import SESR_M5Quantizable -from qai_hub_models.utils.asset_loaders import ( - CachedWebModelAsset, - load_image, - qaihm_temp_dir, -) -from qai_hub_models.utils.testing import assert_most_close, skip_clone_repo_check - -OUTPUT_IMAGE_LOCAL_PATH = "sesr_m5_demo_output.png" -OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, OUTPUT_IMAGE_LOCAL_PATH -) - - -@skip_clone_repo_check -def test_task(): - # AIMET Quantization Simulator introduces randomness. Eliminate that for this test. - torch.manual_seed(0) - image = load_image(IMAGE_ADDRESS) - model = SESR_M5Quantizable.from_pretrained() - app = SuperResolutionApp(model=model) - app_output_image = app.predict(image)[0] - - output_image = load_image(OUTPUT_IMAGE_ADDRESS) - assert_most_close( - np.asarray(app_output_image, dtype=np.float32) / 255, - np.asarray(output_image, dtype=np.float32) / 255, - diff_tol=0.005, - rtol=0.02, - atol=0.2, - ) - - -@pytest.mark.trace -@skip_clone_repo_check -def test_trace(): - image = load_image(IMAGE_ADDRESS) - output_image = load_image(OUTPUT_IMAGE_ADDRESS) - app = SuperResolutionApp( - SESR_M5Quantizable.from_pretrained().convert_to_torchscript() - ) - app_output_image = app.predict(image)[0] - - assert_most_close( - np.asarray(app_output_image, dtype=np.float32) / 255, - np.asarray(output_image, dtype=np.float32) / 255, - diff_tol=0.005, - rtol=0.02, - atol=0.2, - ) - - -@skip_clone_repo_check -def test_aimet_export(): - model = SESR_M5Quantizable.from_pretrained() - name = model.__class__.__name__ - with qaihm_temp_dir() as tmpdir: - output_zip = model.convert_to_onnx_and_aimet_encodings( - tmpdir, - model.get_input_spec(), - ) - assert os.path.exists(output_zip) - with zipfile.ZipFile(output_zip, "r") as zip: - assert f"{name}.aimet/" in zip.namelist() - assert f"{name}.aimet/{name}.encodings" in zip.namelist() - assert f"{name}.aimet/{name}.onnx" in zip.namelist() - assert len(zip.filelist) == 3 - - # No test of torchscipt and aimet encodings due to #8954 - - -@skip_clone_repo_check -def test_demo(): - demo_main(is_test=True) diff --git a/qai_hub_models/models/shufflenet_v2/README.md b/qai_hub_models/models/shufflenet_v2/README.md index 87bbee99..091e4bad 100644 --- a/qai_hub_models/models/shufflenet_v2/README.md +++ b/qai_hub_models/models/shufflenet_v2/README.md @@ -5,8 +5,7 @@ ShufflenetV2 is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of Shufflenet-v2 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/shufflenet_v2). diff --git a/qai_hub_models/models/shufflenet_v2/perf.yaml b/qai_hub_models/models/shufflenet_v2/perf.yaml index f74a480c..44fe24a6 100644 --- a/qai_hub_models/models/shufflenet_v2/perf.yaml +++ b/qai_hub_models/models/shufflenet_v2/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: Shufflenet-v2 performance_metrics: - torchscript_onnx_tflite: - inference_time: 1199.0 - throughput: 834.0283569641368 + inference_time: 1202.0 + throughput: 831.9467554076539 estimated_peak_memory_range: - min: 20480 - max: 1556000 + min: 24576 + max: 1424392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jpxknjlj5 + job_id: jp14djv8p job_status: Passed torchscript_onnx_qnn: - inference_time: 771.0 - throughput: 1297.0168612191958 + inference_time: 775.0 + throughput: 1290.3225806451612 estimated_peak_memory_range: - min: 16384 - max: 5208744 + min: 630784 + max: 36177664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: j5q6ek47p + job_id: jpy14x78p job_status: Passed torchscript_onnx: - inference_time: 1131.0 - throughput: 884.1732979664014 + inference_time: 1089.0 + throughput: 918.2736455463728 estimated_peak_memory_range: - min: 618496 - max: 2161336 + min: 12288 + max: 4560424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 223 - job_id: jg9lj1oqg + job_id: jgjv07qvg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:21:22Z' + timestamp: '2024-11-09T22:11:35Z' - torchscript_onnx_tflite: - inference_time: 1200.0 - throughput: 833.3333333333334 + inference_time: 783.0 + throughput: 1277.139208173691 estimated_peak_memory_range: - min: 12288 - max: 12279816 + min: 16384 + max: 40246720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: j5mnq20yp + job_id: jgdxr3zrp job_status: Passed torchscript_onnx_qnn: - inference_time: 773.0 - throughput: 1293.6610608020699 + inference_time: 510.0 + throughput: 1960.7843137254902 estimated_peak_memory_range: - min: 606208 - max: 63664064 + min: 0 + max: 12193536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jglv6zwe5 + job_id: jp0z1jv95 job_status: Passed torchscript_onnx: - inference_time: 1101.0 - throughput: 908.2652134423251 + inference_time: 721.0 + throughput: 1386.9625520110958 estimated_peak_memory_range: - min: 16384 - max: 4585648 + min: 0 + max: 42288160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 223 - job_id: jp14ylokp + job_id: jpedrzyo5 job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:21:23Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T22:11:36Z' - torchscript_onnx_tflite: - inference_time: 793.0 - throughput: 1261.034047919294 + inference_time: 803.0 + throughput: 1245.3300124533 estimated_peak_memory_range: - min: 12288 - max: 39933312 + min: 8192 + max: 21982928 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jgn6ly1v5 + job_id: j57yj47v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 635.0 - throughput: 1574.8031496062993 + inference_time: 498.0 + throughput: 2008.0321285140562 estimated_peak_memory_range: - min: 618496 - max: 12829056 + min: 0 + max: 10269136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: j56yejovp + job_id: jp8q3x4kp job_status: Passed torchscript_onnx: - inference_time: 737.0 - throughput: 1356.85210312076 + inference_time: 782.0 + throughput: 1278.772378516624 estimated_peak_memory_range: min: 0 - max: 42331232 + max: 23693152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 223 - job_id: jgdxe96kp + job_id: jgz3xmno5 job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:21:24Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T22:11:37Z' - torchscript_onnx_tflite: - inference_time: 803.0 - throughput: 1245.3300124533 + inference_time: 1199.0 + throughput: 834.0283569641368 estimated_peak_memory_range: - min: 12288 - max: 22123168 + min: 20480 + max: 1504160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jgke7n6yg + job_id: jp4lx1985 job_status: Passed torchscript_onnx_qnn: - inference_time: 497.0 - throughput: 2012.0724346076458 + inference_time: 729.0 + throughput: 1371.7421124828531 estimated_peak_memory_range: - min: 614400 - max: 11176336 + min: 651264 + max: 1780952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: j5we2vyz5 - job_status: Passed - torchscript_onnx: - inference_time: 786.0 - throughput: 1272.264631043257 - estimated_peak_memory_range: - min: 0 - max: 23137888 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 223 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 223 - job_id: jg9lj1ovg + job_id: jgkel49wg job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:21:26Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T22:11:27Z' - torchscript_onnx_tflite: - inference_time: 1201.0 - throughput: 832.6394671107411 + inference_time: 1198.0 + throughput: 834.7245409015025 estimated_peak_memory_range: min: 20480 - max: 1790240 + max: 1468080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jprv8qxvg + job_id: jpxk74d35 job_status: Passed torchscript_onnx_qnn: - inference_time: 735.0 - throughput: 1360.544217687075 + inference_time: 739.0 + throughput: 1353.1799729364006 estimated_peak_memory_range: - min: 638976 - max: 2329760 + min: 634880 + max: 1946416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jp3jv3oxg + job_id: jglv0x1j5 job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:21:14Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T22:11:30Z' - torchscript_onnx_tflite: - inference_time: 1199.0 - throughput: 834.0283569641368 + inference_time: 1203.0 + throughput: 831.255195344971 estimated_peak_memory_range: - min: 16384 - max: 9330712 + min: 53248 + max: 1456336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: j5we3o1z5 + job_id: j5mnwmddp job_status: Passed torchscript_onnx_qnn: - inference_time: 746.0 - throughput: 1340.4825737265417 + inference_time: 739.0 + throughput: 1353.1799729364006 estimated_peak_memory_range: - min: 643072 - max: 2204816 + min: 626688 + max: 1903520 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: j57ylz7q5 + job_id: j56y37d6p job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:15:47Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T22:11:31Z' - torchscript_onnx_tflite: inference_time: 1201.0 throughput: 832.6394671107411 estimated_peak_memory_range: - min: 16384 - max: 5481008 + min: 65536 + max: 2638880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jg9lyvxqg + job_id: jgn69n7k5 job_status: Passed torchscript_onnx_qnn: - inference_time: 735.0 - throughput: 1360.544217687075 + inference_time: 739.0 + throughput: 1353.1799729364006 estimated_peak_memory_range: - min: 634880 - max: 1906016 + min: 643072 + max: 1959136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jp4ldq9q5 + job_id: jp3j49w3g job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:15:48Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T22:11:32Z' - torchscript_onnx_tflite: - inference_time: 1200.0 - throughput: 833.3333333333334 + inference_time: 1787.0 + throughput: 559.5970900951315 estimated_peak_memory_range: - min: 36864 - max: 109456936 + min: 12288 + max: 20826608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jgdxqwzkp + job_id: jprv40n0g job_status: Passed torchscript_onnx_qnn: - inference_time: 743.0 - throughput: 1345.8950201884254 + inference_time: 1332.0 + throughput: 750.7507507507507 estimated_peak_memory_range: - min: 643072 - max: 1867416 + min: 0 + max: 5939552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jpxk6vdj5 + job_id: jgo21r4qp job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:15:49Z' + chipset: SA8295P + timestamp: '2024-11-09T22:11:33Z' - torchscript_onnx_tflite: - inference_time: 1318.0 - throughput: 758.7253414264036 + inference_time: 1320.0 + throughput: 757.5757575757576 estimated_peak_memory_range: min: 16384 - max: 40439232 + max: 41263920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jp8q09jzp + job_id: jp2k7wvrp job_status: Passed torchscript_onnx_qnn: - inference_time: 887.0 - throughput: 1127.3957158962796 + inference_time: 882.0 + throughput: 1133.7868480725624 estimated_peak_memory_range: - min: 634880 - max: 15480352 + min: 618496 + max: 15392320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jgz3o9zz5 + job_id: jpv61d7k5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:21:20Z' + timestamp: '2024-11-09T22:11:34Z' - torchscript_onnx_qnn: - inference_time: 889.0 - throughput: 1124.859392575928 + inference_time: 884.0 + throughput: 1131.2217194570135 estimated_peak_memory_range: min: 602112 max: 602112 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jgo2k0d4p + job_id: j5q67ymnp job_status: Passed torchscript_onnx: - inference_time: 1133.0 - throughput: 882.61253309797 + inference_time: 1115.0 + throughput: 896.8609865470852 estimated_peak_memory_range: - min: 3104768 - max: 3104768 + min: 4263936 + max: 4263936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 223 - job_id: j5we2vyj5 + job_id: j5wed7435 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:21:25Z' + timestamp: '2024-11-09T22:11:38Z' diff --git a/qai_hub_models/models/shufflenet_v2_quantized/README.md b/qai_hub_models/models/shufflenet_v2_quantized/README.md index 76982825..6ce74ebd 100644 --- a/qai_hub_models/models/shufflenet_v2_quantized/README.md +++ b/qai_hub_models/models/shufflenet_v2_quantized/README.md @@ -5,8 +5,7 @@ ShufflenetV2 is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of Shufflenet-v2Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/shufflenet_v2_quantized). diff --git a/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml b/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml index d31220e8..535339c3 100644 --- a/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml +++ b/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml @@ -49,11 +49,11 @@ models: - name: Shufflenet-v2Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 612.0 - throughput: 1633.986928104575 + inference_time: 609.0 + throughput: 1642.0361247947455 estimated_peak_memory_range: min: 12288 - max: 74987296 + max: 1420592 primary_compute_unit: NPU precision: int8 layer_info: @@ -61,14 +61,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 233 - job_id: jp14w2okp + job_id: j5wedrmj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 593.0 - throughput: 1686.3406408094436 + inference_time: 592.0 + throughput: 1689.1891891891892 estimated_peak_memory_range: - min: 2125824 - max: 10236936 + min: 20480 + max: 3756528 primary_compute_unit: NPU precision: int8 layer_info: @@ -76,22 +76,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 160 - job_id: jgkeqv0yg + job_id: jpy14yn4p job_status: Passed torchscript_onnx: - inference_time: 8687.0 - throughput: 115.11453896627144 + inference_time: 13536.0 + throughput: 73.87706855791963 estimated_peak_memory_range: - min: 1011712 - max: 29725672 + min: 20480 + max: 13087288 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 210 + layers_on_npu: 397 layers_on_gpu: 0 - layers_on_cpu: 5 - total_layers: 215 - job_id: jp14w21kp + layers_on_cpu: 56 + total_layers: 453 + job_id: jpedrn205 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -100,66 +100,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:40:44Z' + timestamp: '2024-11-09T23:29:55Z' - torchscript_onnx_tflite: - inference_time: 616.0 - throughput: 1623.3766233766235 - estimated_peak_memory_range: - min: 16384 - max: 1635360 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 233 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 233 - job_id: jgdxqn6kp - job_status: Passed - torchscript_onnx_qnn: - inference_time: 598.0 - throughput: 1672.2408026755852 - estimated_peak_memory_range: - min: 163840 - max: 80811104 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 160 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 160 - job_id: j5q6r017p - job_status: Passed - torchscript_onnx: - inference_time: 8840.0 - throughput: 113.12217194570135 - estimated_peak_memory_range: - min: 1056768 - max: 4310792 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 210 - layers_on_gpu: 0 - layers_on_cpu: 5 - total_layers: 215 - job_id: jgdxqn4kp - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:40:46Z' - - torchscript_onnx_tflite: - inference_time: 424.0 - throughput: 2358.490566037736 + inference_time: 433.0 + throughput: 2309.4688221709007 estimated_peak_memory_range: min: 12288 - max: 29689456 + max: 28673696 primary_compute_unit: NPU precision: int8 layer_info: @@ -167,14 +114,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 233 - job_id: j57yl2oq5 + job_id: jg9l3q9vg job_status: Passed torchscript_onnx_qnn: - inference_time: 438.0 - throughput: 2283.10502283105 + inference_time: 427.0 + throughput: 2341.92037470726 estimated_peak_memory_range: - min: 159744 - max: 13626448 + min: 0 + max: 14229808 primary_compute_unit: NPU precision: int8 layer_info: @@ -182,22 +129,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 160 - job_id: jglv248e5 + job_id: jp0z1xke5 job_status: Passed torchscript_onnx: - inference_time: 6900.0 - throughput: 144.92753623188406 + inference_time: 11339.0 + throughput: 88.19119851838786 estimated_peak_memory_range: - min: 872448 - max: 357159328 + min: 2088960 + max: 506924304 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 210 + layers_on_npu: 397 layers_on_gpu: 0 - layers_on_cpu: 5 - total_layers: 215 - job_id: j57yl2nq5 + layers_on_cpu: 56 + total_layers: 453 + job_id: jgz3x0w65 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -206,13 +153,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:40:48Z' + timestamp: '2024-11-09T23:29:57Z' - torchscript_onnx_tflite: - inference_time: 471.0 - throughput: 2123.1422505307855 + inference_time: 472.0 + throughput: 2118.64406779661 estimated_peak_memory_range: min: 8192 - max: 20983344 + max: 20785440 primary_compute_unit: NPU precision: int8 layer_info: @@ -220,14 +167,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 233 - job_id: jp0z24925 + job_id: jp14dmqlp job_status: Passed torchscript_onnx_qnn: - inference_time: 392.0 - throughput: 2551.0204081632655 + inference_time: 379.0 + throughput: 2638.5224274406332 estimated_peak_memory_range: min: 0 - max: 10151104 + max: 10968448 primary_compute_unit: NPU precision: int8 layer_info: @@ -235,22 +182,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 160 - job_id: j5we3wzz5 + job_id: jp8q3k88p job_status: Passed torchscript_onnx: - inference_time: 6541.0 - throughput: 152.88182235132243 + inference_time: 12276.0 + throughput: 81.45975887911372 estimated_peak_memory_range: - min: 1122304 - max: 287754128 + min: 2052096 + max: 474428096 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 210 + layers_on_npu: 397 layers_on_gpu: 0 - layers_on_cpu: 5 - total_layers: 215 - job_id: jpxk69rj5 + layers_on_cpu: 56 + total_layers: 453 + job_id: j5wedrxj5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -259,13 +206,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:40:51Z' + timestamp: '2024-11-09T23:29:58Z' - torchscript_onnx_tflite: - inference_time: 926.0 - throughput: 1079.913606911447 + inference_time: 905.0 + throughput: 1104.9723756906078 estimated_peak_memory_range: min: 12288 - max: 22068000 + max: 21670448 primary_compute_unit: NPU precision: int8 layer_info: @@ -273,14 +220,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 233 - job_id: jp4ldneq5 + job_id: jgdxrm7lp job_status: Passed torchscript_onnx_qnn: - inference_time: 1142.0 - throughput: 875.6567425569177 + inference_time: 1150.0 + throughput: 869.5652173913044 estimated_peak_memory_range: - min: 16384 - max: 8272000 + min: 12288 + max: 8695776 primary_compute_unit: NPU precision: int8 layer_info: @@ -288,7 +235,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 160 - job_id: j56yz2mvp + job_id: jgkelkdog job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -297,13 +244,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-27T00:40:28Z' + timestamp: '2024-11-09T23:29:40Z' - torchscript_onnx_tflite: - inference_time: 10597.0 - throughput: 94.36633009342266 + inference_time: 9722.0 + throughput: 102.85949393128986 estimated_peak_memory_range: - min: 126976 - max: 12958720 + min: 36864 + max: 5139744 primary_compute_unit: CPU precision: fp32 layer_info: @@ -311,7 +258,7 @@ models: layers_on_gpu: 11 layers_on_cpu: 178 total_layers: 233 - job_id: jpxk690j5 + job_id: j57yj86r5 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -320,13 +267,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-27T00:40:09Z' + timestamp: '2024-11-09T23:29:22Z' - torchscript_onnx_tflite: - inference_time: 611.0 - throughput: 1636.6612111292961 + inference_time: 613.0 + throughput: 1631.3213703099511 estimated_peak_memory_range: - min: 12288 - max: 1436280 + min: 16384 + max: 1344272 primary_compute_unit: NPU precision: int8 layer_info: @@ -334,14 +281,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 233 - job_id: j5mn6e9yp + job_id: jp4lx28l5 job_status: Passed torchscript_onnx_qnn: - inference_time: 544.0 - throughput: 1838.235294117647 + inference_time: 540.0 + throughput: 1851.851851851852 estimated_peak_memory_range: - min: 0 - max: 1668096 + min: 184320 + max: 1750304 primary_compute_unit: NPU precision: int8 layer_info: @@ -349,7 +296,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 160 - job_id: jp3j1n7xg + job_id: j5q67dwmp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -358,13 +305,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:40:30Z' + timestamp: '2024-11-09T23:29:42Z' - torchscript_onnx_tflite: - inference_time: 606.0 - throughput: 1650.1650165016501 + inference_time: 612.0 + throughput: 1633.986928104575 estimated_peak_memory_range: - min: 16384 - max: 75068976 + min: 12288 + max: 11542832 primary_compute_unit: NPU precision: int8 layer_info: @@ -372,14 +319,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 233 - job_id: j5q6rkj4p + job_id: jpxk7zm95 job_status: Passed torchscript_onnx_qnn: - inference_time: 548.0 - throughput: 1824.8175182481752 + inference_time: 544.0 + throughput: 1838.235294117647 estimated_peak_memory_range: - min: 184320 - max: 1687472 + min: 2117632 + max: 3351384 primary_compute_unit: NPU precision: int8 layer_info: @@ -387,7 +334,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 160 - job_id: jgo2n0jxp + job_id: j56y30v7p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -396,13 +343,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T02:02:47Z' + timestamp: '2024-11-09T23:29:46Z' - torchscript_onnx_tflite: - inference_time: 616.0 - throughput: 1623.3766233766235 + inference_time: 614.0 + throughput: 1628.6644951140065 estimated_peak_memory_range: min: 12288 - max: 1482208 + max: 1564600 primary_compute_unit: NPU precision: int8 layer_info: @@ -410,14 +357,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 233 - job_id: jglv2zj85 + job_id: j5mnwl4qp job_status: Passed torchscript_onnx_qnn: - inference_time: 550.0 - throughput: 1818.1818181818182 + inference_time: 541.0 + throughput: 1848.4288354898335 estimated_peak_memory_range: - min: 192512 - max: 1492440 + min: 184320 + max: 1383976 primary_compute_unit: NPU precision: int8 layer_info: @@ -425,7 +372,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 160 - job_id: jpv6rojj5 + job_id: jp3j4r8zg job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -434,13 +381,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T02:02:48Z' + timestamp: '2024-11-09T23:29:47Z' - torchscript_onnx_tflite: - inference_time: 611.0 - throughput: 1636.6612111292961 + inference_time: 612.0 + throughput: 1633.986928104575 estimated_peak_memory_range: min: 12288 - max: 9110904 + max: 1749576 primary_compute_unit: NPU precision: int8 layer_info: @@ -448,14 +395,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 233 - job_id: jp3j13ylg + job_id: jgn69wxm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 548.0 - throughput: 1824.8175182481752 + inference_time: 545.0 + throughput: 1834.8623853211009 estimated_peak_memory_range: - min: 188416 - max: 1561808 + min: 24576 + max: 1584200 primary_compute_unit: NPU precision: int8 layer_info: @@ -463,7 +410,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 160 - job_id: jpedw1j15 + job_id: jgo219mdp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -472,13 +419,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T02:02:52Z' + timestamp: '2024-11-09T23:29:49Z' - torchscript_onnx_tflite: - inference_time: 974.0 - throughput: 1026.694045174538 + inference_time: 964.0 + throughput: 1037.344398340249 estimated_peak_memory_range: min: 12288 - max: 19928704 + max: 19860064 primary_compute_unit: NPU precision: int8 layer_info: @@ -486,14 +433,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 233 - job_id: j56yzjk0p + job_id: jprv479eg job_status: Passed torchscript_onnx_qnn: inference_time: 999.0 throughput: 1001.001001001001 estimated_peak_memory_range: min: 0 - max: 5909936 + max: 5867408 primary_compute_unit: NPU precision: int8 layer_info: @@ -501,7 +448,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 160 - job_id: jgjv2mjxg + job_id: jpv61n4m5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -510,13 +457,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T02:02:50Z' + timestamp: '2024-11-09T23:29:51Z' - torchscript_onnx_tflite: - inference_time: 640.0 - throughput: 1562.5 + inference_time: 648.0 + throughput: 1543.20987654321 estimated_peak_memory_range: min: 16384 - max: 30163792 + max: 30243344 primary_compute_unit: NPU precision: int8 layer_info: @@ -524,14 +471,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 233 - job_id: jpy1jz2rp + job_id: jp2k7zjmp job_status: Passed torchscript_onnx_qnn: - inference_time: 661.0 - throughput: 1512.8593040847202 + inference_time: 636.0 + throughput: 1572.3270440251572 estimated_peak_memory_range: min: 159744 - max: 16048656 + max: 16391456 primary_compute_unit: NPU precision: int8 layer_info: @@ -539,7 +486,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 160 - job_id: jgz3j2yz5 + job_id: jgjv0818g job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -548,13 +495,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:40:39Z' + timestamp: '2024-11-09T23:29:53Z' - torchscript_onnx_qnn: - inference_time: 669.0 - throughput: 1494.7683109118086 + inference_time: 673.0 + throughput: 1485.8841010401188 estimated_peak_memory_range: - min: 622592 - max: 622592 + min: 512000 + max: 512000 primary_compute_unit: NPU precision: int8 layer_info: @@ -562,22 +509,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 160 - job_id: jgo2nzw4p + job_id: jglv0q7l5 job_status: Passed torchscript_onnx: - inference_time: 10313.0 - throughput: 96.9649956365752 + inference_time: 15566.0 + throughput: 64.24257998201207 estimated_peak_memory_range: - min: 6819840 - max: 6819840 + min: 10137600 + max: 10137600 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 210 + layers_on_npu: 397 layers_on_gpu: 0 - layers_on_cpu: 5 - total_layers: 215 - job_id: jp4ldn4q5 + layers_on_cpu: 56 + total_layers: 453 + job_id: jg9l3q8vg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -586,4 +533,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:40:49Z' + timestamp: '2024-11-09T23:30:00Z' diff --git a/qai_hub_models/models/sinet/README.md b/qai_hub_models/models/sinet/README.md index eb12f3f7..f13fc9f1 100644 --- a/qai_hub_models/models/sinet/README.md +++ b/qai_hub_models/models/sinet/README.md @@ -5,8 +5,7 @@ SINet is a machine learning model that is designed to segment people from close-up portrait images in real time. -This is based on the implementation of SINet found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/sinet). diff --git a/qai_hub_models/models/sinet/perf.yaml b/qai_hub_models/models/sinet/perf.yaml index 0ccd3a55..42c35313 100644 --- a/qai_hub_models/models/sinet/perf.yaml +++ b/qai_hub_models/models/sinet/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: SINet performance_metrics: - torchscript_onnx_tflite: - inference_time: 1752.0 - throughput: 570.7762557077625 + inference_time: 1760.0 + throughput: 568.1818181818181 estimated_peak_memory_range: - min: 16384 - max: 5505464 + min: 12288 + max: 12936240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: jp14yljkp + job_id: jp14dj72p job_status: Passed torchscript_onnx_qnn: - inference_time: 1191.0 - throughput: 839.6305625524769 + inference_time: 1185.0 + throughput: 843.8818565400844 estimated_peak_memory_range: - min: 32768 - max: 5033680 + min: 634880 + max: 5875344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jpy1rworp + job_id: jpy14xq7p job_status: Passed torchscript_onnx: inference_time: 2262.0 throughput: 442.0866489832007 estimated_peak_memory_range: - min: 282624 - max: 1904256 + min: 425984 + max: 75341904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 229 - job_id: jgjvzmr7g + job_id: jgjv07oxg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:20:41Z' + timestamp: '2024-11-09T22:10:53Z' - torchscript_onnx_tflite: - inference_time: 1759.0 - throughput: 568.5048322910744 + inference_time: 1138.0 + throughput: 878.7346221441124 estimated_peak_memory_range: - min: 32768 - max: 1546216 + min: 12288 + max: 32653312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: jgdxe93kp + job_id: jgdxr38ep job_status: Passed torchscript_onnx_qnn: - inference_time: 1192.0 - throughput: 838.9261744966443 + inference_time: 805.0 + throughput: 1242.2360248447205 estimated_peak_memory_range: - min: 2109440 - max: 7436120 + min: 618496 + max: 17022912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jp0z3qm25 + job_id: jp0z1jd65 job_status: Passed torchscript_onnx: - inference_time: 2283.0 - throughput: 438.02014892685065 + inference_time: 1495.0 + throughput: 668.8963210702341 estimated_peak_memory_range: - min: 622592 - max: 2232120 + min: 0 + max: 37555360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 229 - job_id: jpede1775 + job_id: jpedrz815 job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:20:42Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T22:10:54Z' - torchscript_onnx_tflite: - inference_time: 1156.0 - throughput: 865.0519031141869 + inference_time: 922.0 + throughput: 1084.5986984815618 estimated_peak_memory_range: - min: 16384 - max: 32763680 + min: 12288 + max: 22719488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: j57y0w4q5 + job_id: j57yj4kl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 818.0 - throughput: 1222.4938875305625 + inference_time: 770.0 + throughput: 1298.7012987012988 estimated_peak_memory_range: - min: 618496 - max: 14972352 + min: 0 + max: 12501824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jp8q09ezp + job_id: jp8q3x6xp job_status: Passed torchscript_onnx: - inference_time: 1549.0 - throughput: 645.577792123951 + inference_time: 1250.0 + throughput: 800.0 estimated_peak_memory_range: min: 0 - max: 37764160 + max: 26279312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 229 - job_id: jgz3o9lz5 + job_id: jgz3xm8k5 job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:20:43Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T22:10:55Z' - torchscript_onnx_tflite: - inference_time: 1126.0 - throughput: 888.0994671403197 + inference_time: 1732.0 + throughput: 577.3672055427252 estimated_peak_memory_range: - min: 8192 - max: 22827312 + min: 20480 + max: 1627752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: jp2k06rxp + job_id: jp4lx1mv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 732.0 - throughput: 1366.120218579235 + inference_time: 1159.0 + throughput: 862.8127696289905 estimated_peak_memory_range: - min: 0 - max: 12762800 + min: 671744 + max: 1881832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jpv60ol75 - job_status: Passed - torchscript_onnx: - inference_time: 1522.0 - throughput: 657.030223390276 - estimated_peak_memory_range: - min: 0 - max: 26168848 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 229 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 229 - job_id: jg9lj1zqg + job_id: jgkel4o2g job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:20:45Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T22:10:45Z' - torchscript_onnx_tflite: - inference_time: 1731.0 - throughput: 577.7007510109763 + inference_time: 1750.0 + throughput: 571.4285714285714 estimated_peak_memory_range: - min: 24576 - max: 1398648 + min: 16384 + max: 1370360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: jp4lko1q5 + job_id: jpxk74315 job_status: Passed torchscript_onnx_qnn: - inference_time: 1161.0 - throughput: 861.3264427217915 + inference_time: 1173.0 + throughput: 852.5149190110827 estimated_peak_memory_range: - min: 651264 - max: 1872024 + min: 638976 + max: 2104512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jgke7n2yg + job_id: jglv0xo85 job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:20:32Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T22:10:47Z' - torchscript_onnx_tflite: - inference_time: 1756.0 - throughput: 569.4760820045558 + inference_time: 1753.0 + throughput: 570.4506560182544 estimated_peak_memory_range: - min: 16384 - max: 13809272 + min: 20480 + max: 1458376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: j5q6r8m7p + job_id: j5mnwmowp job_status: Passed torchscript_onnx_qnn: - inference_time: 1179.0 - throughput: 848.1764206955047 + inference_time: 1172.0 + throughput: 853.2423208191126 estimated_peak_memory_range: - min: 622592 - max: 2284832 + min: 638976 + max: 1910120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jp3j1kwxg + job_id: j56y37r0p job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:15:27Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T22:10:48Z' - torchscript_onnx_tflite: - inference_time: 1765.0 - throughput: 566.57223796034 + inference_time: 1749.0 + throughput: 571.7552887364208 estimated_peak_memory_range: min: 12288 - max: 2943848 + max: 1470616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: jglv2n1e5 + job_id: jgn69nor5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1168.0 - throughput: 856.1643835616438 + inference_time: 1166.0 + throughput: 857.6329331046312 estimated_peak_memory_range: min: 634880 - max: 1802392 + max: 2146648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jgo2ny44p + job_id: jp3j49xlg job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:15:28Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T22:10:49Z' - torchscript_onnx_tflite: - inference_time: 1750.0 - throughput: 571.4285714285714 + inference_time: 2404.0 + throughput: 415.97337770382694 estimated_peak_memory_range: - min: 24576 - max: 1731920 + min: 12288 + max: 21831872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: j56yz6dvp + job_id: jprv40o9g job_status: Passed torchscript_onnx_qnn: - inference_time: 1175.0 - throughput: 851.063829787234 + inference_time: 2114.0 + throughput: 473.0368968779565 estimated_peak_memory_range: - min: 663552 - max: 2105424 + min: 0 + max: 5966336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jpv6r3975 + job_id: jgo21roxp job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:15:29Z' + chipset: SA8295P + timestamp: '2024-11-09T22:10:50Z' - torchscript_onnx_tflite: - inference_time: 1897.0 - throughput: 527.1481286241434 + inference_time: 1886.0 + throughput: 530.2226935312831 estimated_peak_memory_range: - min: 16384 - max: 34056320 + min: 12288 + max: 32229280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: jprv8qlvg + job_id: jp2k7w44p job_status: Passed torchscript_onnx_qnn: - inference_time: 1330.0 - throughput: 751.8796992481203 + inference_time: 1316.0 + throughput: 759.8784194528876 estimated_peak_memory_range: min: 618496 - max: 18351824 + max: 17622112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jgo2k0l4p + job_id: jpv61dej5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:20:38Z' + timestamp: '2024-11-09T22:10:52Z' - torchscript_onnx_qnn: - inference_time: 1386.0 - throughput: 721.5007215007215 + inference_time: 1345.0 + throughput: 743.4944237918215 estimated_peak_memory_range: min: 602112 max: 602112 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: j5q6ekl7p + job_id: j5q67yz4p job_status: Passed torchscript_onnx: - inference_time: 2372.0 - throughput: 421.5851602023609 + inference_time: 2360.0 + throughput: 423.728813559322 estimated_peak_memory_range: - min: 1794048 - max: 1794048 + min: 1847296 + max: 1847296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 229 - job_id: j5we2vlz5 + job_id: j5wed7165 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:20:44Z' + timestamp: '2024-11-09T22:10:56Z' diff --git a/qai_hub_models/models/squeezenet1_1/README.md b/qai_hub_models/models/squeezenet1_1/README.md index 71ce0fa4..8268bf62 100644 --- a/qai_hub_models/models/squeezenet1_1/README.md +++ b/qai_hub_models/models/squeezenet1_1/README.md @@ -5,8 +5,7 @@ SqueezeNet is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of SqueezeNet-1_1 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/squeezenet1_1). diff --git a/qai_hub_models/models/squeezenet1_1/perf.yaml b/qai_hub_models/models/squeezenet1_1/perf.yaml index 3d5eaca5..944788c6 100644 --- a/qai_hub_models/models/squeezenet1_1/perf.yaml +++ b/qai_hub_models/models/squeezenet1_1/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,55 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: SqueezeNet-1_1 performance_metrics: - torchscript_onnx_tflite: - inference_time: 642.0 - throughput: 1557.632398753894 - estimated_peak_memory_range: - min: 12288 - max: 1341952 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 41 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 41 - job_id: jpede1y75 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 700.0 - throughput: 1428.5714285714287 - estimated_peak_memory_range: - min: 2125824 - max: 8236672 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 70 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 70 - job_id: j5mnq28yp - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:19:58Z' - - torchscript_onnx_tflite: - inference_time: 641.0 - throughput: 1560.0624024960998 + inference_time: 643.0 + throughput: 1555.2099533437015 estimated_peak_memory_range: min: 12288 - max: 1963208 + max: 16020160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -91,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jgz3o9nz5 + job_id: jpedrz415 job_status: Passed torchscript_onnx_qnn: inference_time: 709.0 throughput: 1410.4372355430182 estimated_peak_memory_range: - min: 28672 - max: 3133248 + min: 634880 + max: 11149128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jgn6lynv5 + job_id: j5mnwm4wp job_status: Passed torchscript_onnx: - inference_time: 656.0 - throughput: 1524.3902439024391 + inference_time: 666.0 + throughput: 1501.5015015015015 estimated_peak_memory_range: - min: 12288 - max: 3829328 + min: 589824 + max: 87936816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jp3jv39xg + job_id: j56y37v0p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -130,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:19:59Z' + timestamp: '2024-11-09T22:10:10Z' - torchscript_onnx_tflite: - inference_time: 460.0 - throughput: 2173.913043478261 + inference_time: 442.0 + throughput: 2262.443438914027 estimated_peak_memory_range: min: 16384 - max: 27299824 + max: 26781744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -144,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: j5we2v4z5 + job_id: jgz3xmvk5 job_status: Passed torchscript_onnx_qnn: - inference_time: 513.0 - throughput: 1949.317738791423 + inference_time: 490.0 + throughput: 2040.8163265306123 estimated_peak_memory_range: - min: 618496 - max: 13310320 + min: 0 + max: 14279392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jprv8q0vg + job_id: jgn69nxr5 job_status: Passed torchscript_onnx: - inference_time: 511.0 - throughput: 1956.9471624266146 + inference_time: 504.0 + throughput: 1984.126984126984 estimated_peak_memory_range: min: 0 - max: 28449808 + max: 27286448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,7 +138,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jgo2k0r4p + job_id: jp3j498lg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -183,13 +147,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:20:00Z' + timestamp: '2024-11-09T22:10:11Z' - torchscript_onnx_tflite: - inference_time: 448.0 - throughput: 2232.1428571428573 + inference_time: 431.0 + throughput: 2320.185614849188 estimated_peak_memory_range: min: 8192 - max: 16300816 + max: 16696336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -197,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jpxknjxj5 + job_id: j5wed7x65 job_status: Passed torchscript_onnx_qnn: - inference_time: 473.0 - throughput: 2114.164904862579 + inference_time: 502.0 + throughput: 1992.03187250996 estimated_peak_memory_range: min: 0 - max: 9195504 + max: 8920368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jglv6zxe5 + job_id: jprv4099g job_status: Passed torchscript_onnx: - inference_time: 518.0 - throughput: 1930.5019305019305 + inference_time: 513.0 + throughput: 1949.317738791423 estimated_peak_memory_range: min: 0 - max: 16924400 + max: 17181248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,7 +191,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jgjvzm77g + job_id: jgo21rmxp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -236,13 +200,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:20:03Z' + timestamp: '2024-11-09T22:10:12Z' - torchscript_onnx_tflite: - inference_time: 640.0 - throughput: 1562.5 + inference_time: 642.0 + throughput: 1557.632398753894 estimated_peak_memory_range: - min: 16384 - max: 2605168 + min: 12288 + max: 1233616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -250,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jg9lj1dqg + job_id: jg9l3m8lg job_status: Passed torchscript_onnx_qnn: - inference_time: 644.0 - throughput: 1552.7950310559006 + inference_time: 646.0 + throughput: 1547.9876160990711 estimated_peak_memory_range: - min: 634880 - max: 1928272 + min: 638976 + max: 1858672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,7 +229,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jp2k06wxp + job_id: jp2k7wj4p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -274,13 +238,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:19:50Z' + timestamp: '2024-11-09T22:10:03Z' - torchscript_onnx_tflite: - inference_time: 643.0 - throughput: 1555.2099533437015 + inference_time: 642.0 + throughput: 1557.632398753894 estimated_peak_memory_range: - min: 28672 - max: 1879016 + min: 36864 + max: 1374440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -288,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jp4ldqmq5 + job_id: jp14dj32p job_status: Passed torchscript_onnx_qnn: - inference_time: 650.0 - throughput: 1538.4615384615386 + inference_time: 647.0 + throughput: 1545.595054095827 estimated_peak_memory_range: - min: 2719744 - max: 3820552 + min: 651264 + max: 1853760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,7 +267,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jgn6m27v5 + job_id: jp0z1jk65 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -312,13 +276,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:15:07Z' + timestamp: '2024-11-09T22:10:05Z' - torchscript_onnx_tflite: - inference_time: 641.0 - throughput: 1560.0624024960998 + inference_time: 639.0 + throughput: 1564.9452269170579 estimated_peak_memory_range: - min: 32768 - max: 72353096 + min: 16384 + max: 15084600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -326,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jpxk6v3j5 + job_id: jgdxr30ep job_status: Passed torchscript_onnx_qnn: - inference_time: 654.0 - throughput: 1529.051987767584 + inference_time: 649.0 + throughput: 1540.8320493066255 estimated_peak_memory_range: - min: 634880 - max: 1913424 + min: 626688 + max: 1899680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,7 +305,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jprv2knvg + job_id: jp8q3x8xp job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -350,13 +314,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:15:08Z' + timestamp: '2024-11-09T22:10:06Z' - torchscript_onnx_tflite: - inference_time: 642.0 - throughput: 1557.632398753894 + inference_time: 641.0 + throughput: 1560.0624024960998 estimated_peak_memory_range: - min: 24576 - max: 1489544 + min: 12288 + max: 1546232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -364,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: j5mn6royp + job_id: j57yj46l5 job_status: Passed torchscript_onnx_qnn: - inference_time: 644.0 - throughput: 1552.7950310559006 + inference_time: 645.0 + throughput: 1550.3875968992247 estimated_peak_memory_range: - min: 53248 - max: 1627456 + min: 634880 + max: 1937064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,7 +343,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jp2k98vxp + job_id: jgkel4d2g job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -388,13 +352,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:15:09Z' + timestamp: '2024-11-09T22:10:07Z' + - torchscript_onnx_tflite: + inference_time: 1227.0 + throughput: 814.9959250203749 + estimated_peak_memory_range: + min: 12288 + max: 15864160 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 41 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 41 + job_id: jp4lx18v5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1281.0 + throughput: 780.64012490242 + estimated_peak_memory_range: + min: 0 + max: 5954448 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 70 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 70 + job_id: j5q67yw4p + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:10:08Z' - torchscript_onnx_tflite: - inference_time: 805.0 - throughput: 1242.2360248447205 + inference_time: 818.0 + throughput: 1222.4938875305625 estimated_peak_memory_range: min: 16384 - max: 28031920 + max: 28356800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -402,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jp4lko3q5 + job_id: jpxk74m15 job_status: Passed torchscript_onnx_qnn: - inference_time: 876.0 - throughput: 1141.552511415525 + inference_time: 879.0 + throughput: 1137.6564277588168 estimated_peak_memory_range: min: 618496 - max: 15581152 + max: 15928960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: j5q6eky7p + job_id: jglv0x785 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -426,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:19:56Z' + timestamp: '2024-11-09T22:10:09Z' - torchscript_onnx_qnn: - inference_time: 804.0 - throughput: 1243.7810945273632 + inference_time: 807.0 + throughput: 1239.1573729863692 estimated_peak_memory_range: min: 602112 max: 602112 @@ -440,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jpy1rwxrp + job_id: jpy14xn7p job_status: Passed torchscript_onnx: - inference_time: 698.0 - throughput: 1432.6647564469913 + inference_time: 694.0 + throughput: 1440.922190201729 estimated_peak_memory_range: - min: 2764800 - max: 2764800 + min: 2809856 + max: 2809856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -455,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jpv60od75 + job_id: jpv61d4j5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -464,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:20:01Z' + timestamp: '2024-11-09T22:10:13Z' diff --git a/qai_hub_models/models/squeezenet1_1_quantized/README.md b/qai_hub_models/models/squeezenet1_1_quantized/README.md index 063e6f42..5b1aabf5 100644 --- a/qai_hub_models/models/squeezenet1_1_quantized/README.md +++ b/qai_hub_models/models/squeezenet1_1_quantized/README.md @@ -5,8 +5,7 @@ SqueezeNet is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of SqueezeNet-1_1Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/squeezenet1_1_quantized). diff --git a/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml b/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml index a3df22db..41bcb727 100644 --- a/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml +++ b/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml @@ -49,11 +49,11 @@ models: - name: SqueezeNet-1_1Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 202.0 - throughput: 4950.495049504951 + inference_time: 208.0 + throughput: 4807.692307692308 estimated_peak_memory_range: min: 12288 - max: 33032856 + max: 1452304 primary_compute_unit: NPU precision: int8 layer_info: @@ -61,67 +61,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 43 - job_id: jgjv2dr1g + job_id: j5q67d7mp job_status: Passed torchscript_onnx_qnn: inference_time: 466.0 throughput: 2145.922746781116 estimated_peak_memory_range: - min: 12288 - max: 72869864 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 71 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 71 - job_id: jprv26lkg - job_status: Passed - torchscript_onnx: - inference_time: 464.0 - throughput: 2155.1724137931033 - estimated_peak_memory_range: - min: 86016 - max: 1600216 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 47 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 47 - job_id: jgjv2d31g - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:39:36Z' - - torchscript_onnx_tflite: - inference_time: 198.0 - throughput: 5050.50505050505 - estimated_peak_memory_range: - min: 20480 - max: 2772080 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 43 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 43 - job_id: jpedwo785 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 471.0 - throughput: 2123.1422505307855 - estimated_peak_memory_range: - min: 172032 - max: 3068712 + min: 176128 + max: 3121496 primary_compute_unit: NPU precision: int8 layer_info: @@ -129,22 +76,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jp2k9xr6p - job_status: Passed - torchscript_onnx: - inference_time: 480.0 - throughput: 2083.3333333333335 - estimated_peak_memory_range: - min: 90112 - max: 1529992 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 47 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 47 - job_id: jpedwo685 + job_id: jgdxrmrlp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -153,13 +85,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:39:38Z' + timestamp: '2024-11-09T23:28:47Z' - torchscript_onnx_tflite: - inference_time: 152.0 - throughput: 6578.9473684210525 + inference_time: 151.0 + throughput: 6622.516556291391 estimated_peak_memory_range: - min: 16384 - max: 26980160 + min: 12288 + max: 27088736 primary_compute_unit: NPU precision: int8 layer_info: @@ -167,14 +99,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 43 - job_id: jgz3j2l45 + job_id: jglv0q0l5 job_status: Passed torchscript_onnx_qnn: - inference_time: 345.0 - throughput: 2898.550724637681 + inference_time: 340.0 + throughput: 2941.176470588235 estimated_peak_memory_range: - min: 188416 - max: 12843296 + min: 0 + max: 13787632 primary_compute_unit: NPU precision: int8 layer_info: @@ -182,22 +114,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jpy1jzo0p - job_status: Passed - torchscript_onnx: - inference_time: 412.0 - throughput: 2427.1844660194174 - estimated_peak_memory_range: - min: 12288 - max: 30644992 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 47 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 47 - job_id: jgz3j2z45 + job_id: j57yj8vr5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -206,13 +123,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:39:40Z' + timestamp: '2024-11-09T23:28:48Z' - torchscript_onnx_tflite: - inference_time: 125.0 - throughput: 8000.0 + inference_time: 152.0 + throughput: 6578.9473684210525 estimated_peak_memory_range: min: 8192 - max: 16473184 + max: 16314528 primary_compute_unit: NPU precision: int8 layer_info: @@ -220,14 +137,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 43 - job_id: j5mn6e07p + job_id: j56y3037p job_status: Passed torchscript_onnx_qnn: - inference_time: 363.0 - throughput: 2754.8209366391184 + inference_time: 284.0 + throughput: 3521.1267605633802 estimated_peak_memory_range: - min: 28672 - max: 9120512 + min: 12288 + max: 9725664 primary_compute_unit: NPU precision: int8 layer_info: @@ -235,22 +152,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jgo2nzd1p - job_status: Passed - torchscript_onnx: - inference_time: 391.0 - throughput: 2557.544757033248 - estimated_peak_memory_range: - min: 0 - max: 18980832 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 47 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 47 - job_id: jg9ly0omg + job_id: jp4lx2jl5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -259,13 +161,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:39:43Z' + timestamp: '2024-11-09T23:28:50Z' - torchscript_onnx_tflite: - inference_time: 490.0 - throughput: 2040.8163265306123 + inference_time: 482.0 + throughput: 2074.688796680498 estimated_peak_memory_range: min: 12288 - max: 16843808 + max: 17473232 primary_compute_unit: NPU precision: int8 layer_info: @@ -273,14 +175,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 43 - job_id: j5we3wl45 + job_id: jp3j4r4zg job_status: Passed torchscript_onnx_qnn: - inference_time: 996.0 - throughput: 1004.0160642570281 + inference_time: 979.0 + throughput: 1021.4504596527069 estimated_peak_memory_range: - min: 12288 - max: 7626368 + min: 36864 + max: 7943584 primary_compute_unit: NPU precision: int8 layer_info: @@ -288,7 +190,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jp0z24o05 + job_id: jpxk7ze95 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -297,13 +199,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-27T00:39:19Z' + timestamp: '2024-11-09T23:28:32Z' - torchscript_onnx_tflite: - inference_time: 4218.0 - throughput: 237.0791844476055 + inference_time: 4151.0 + throughput: 240.9058058299205 estimated_peak_memory_range: - min: 49152 - max: 2884032 + min: 16384 + max: 4801600 primary_compute_unit: NPU precision: int8 layer_info: @@ -311,7 +213,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 43 - job_id: jg9ly0zmg + job_id: jgo2191dp job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -320,13 +222,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-27T00:39:00Z' + timestamp: '2024-11-09T23:28:15Z' - torchscript_onnx_tflite: - inference_time: 204.0 - throughput: 4901.9607843137255 + inference_time: 206.0 + throughput: 4854.368932038835 estimated_peak_memory_range: min: 12288 - max: 34374312 + max: 1317264 primary_compute_unit: NPU precision: int8 layer_info: @@ -334,14 +236,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 43 - job_id: jp14w2nnp + job_id: jpv61n1m5 job_status: Passed torchscript_onnx_qnn: - inference_time: 428.0 - throughput: 2336.448598130841 + inference_time: 429.0 + throughput: 2331.002331002331 estimated_peak_memory_range: min: 180224 - max: 1455800 + max: 1395632 primary_compute_unit: NPU precision: int8 layer_info: @@ -349,7 +251,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jp8qm2jqp + job_id: j5mnwlvqp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -358,13 +260,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:39:21Z' + timestamp: '2024-11-09T23:28:34Z' - torchscript_onnx_tflite: - inference_time: 193.0 - throughput: 5181.347150259067 + inference_time: 203.0 + throughput: 4926.108374384236 estimated_peak_memory_range: - min: 12288 - max: 1316712 + min: 16384 + max: 72274456 primary_compute_unit: NPU precision: int8 layer_info: @@ -372,14 +274,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 43 - job_id: j57ylwql5 + job_id: jgjv0808g job_status: Passed torchscript_onnx_qnn: inference_time: 428.0 throughput: 2336.448598130841 estimated_peak_memory_range: min: 184320 - max: 1490448 + max: 1714096 primary_compute_unit: NPU precision: int8 layer_info: @@ -387,7 +289,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jgn6myjr5 + job_id: jprv471eg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -396,13 +298,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T02:02:18Z' + timestamp: '2024-11-09T23:28:38Z' - torchscript_onnx_tflite: - inference_time: 204.0 - throughput: 4901.9607843137255 + inference_time: 203.0 + throughput: 4926.108374384236 estimated_peak_memory_range: - min: 45056 - max: 25579672 + min: 36864 + max: 1416472 primary_compute_unit: NPU precision: int8 layer_info: @@ -410,14 +312,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 43 - job_id: jp4ldozv5 + job_id: jgz3x0x65 job_status: Passed torchscript_onnx_qnn: - inference_time: 429.0 - throughput: 2331.002331002331 + inference_time: 427.0 + throughput: 2341.92037470726 estimated_peak_memory_range: - min: 184320 - max: 1528504 + min: 221184 + max: 1497584 primary_compute_unit: NPU precision: int8 layer_info: @@ -425,7 +327,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jprv2qz9g + job_id: jp2k7z3mp job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -434,13 +336,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T02:02:20Z' + timestamp: '2024-11-09T23:28:40Z' - torchscript_onnx_tflite: inference_time: 204.0 throughput: 4901.9607843137255 estimated_peak_memory_range: min: 12288 - max: 2658664 + max: 1339232 primary_compute_unit: NPU precision: int8 layer_info: @@ -448,14 +350,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 43 - job_id: j5mn62jwp + job_id: j5wedrdj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 431.0 - throughput: 2320.185614849188 + inference_time: 423.0 + throughput: 2364.066193853428 estimated_peak_memory_range: - min: 12288 - max: 1319720 + min: 200704 + max: 1475928 primary_compute_unit: NPU precision: int8 layer_info: @@ -463,7 +365,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jpy1jw97p + job_id: jpy14yv4p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -472,13 +374,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T02:02:23Z' + timestamp: '2024-11-09T23:28:41Z' - torchscript_onnx_tflite: - inference_time: 514.0 - throughput: 1945.5252918287938 + inference_time: 533.0 + throughput: 1876.172607879925 estimated_peak_memory_range: min: 12288 - max: 15423008 + max: 15626560 primary_compute_unit: NPU precision: int8 layer_info: @@ -486,14 +388,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 43 - job_id: jpxk6jw15 + job_id: jg9l3q3vg job_status: Passed torchscript_onnx_qnn: - inference_time: 900.0 - throughput: 1111.111111111111 + inference_time: 893.0 + throughput: 1119.8208286674133 estimated_peak_memory_range: - min: 0 - max: 5964912 + min: 163840 + max: 6089136 primary_compute_unit: NPU precision: int8 layer_info: @@ -501,7 +403,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jp2k9624p + job_id: jp8q3kw8p job_status: Passed reference_device_info: name: SA8295P ADP @@ -510,13 +412,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T02:02:21Z' + timestamp: '2024-11-09T23:28:43Z' - torchscript_onnx_tflite: - inference_time: 240.0 - throughput: 4166.666666666667 + inference_time: 239.0 + throughput: 4184.100418410042 estimated_peak_memory_range: - min: 12288 - max: 28310704 + min: 16384 + max: 26341184 primary_compute_unit: NPU precision: int8 layer_info: @@ -524,14 +426,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 43 - job_id: jpxk69l85 + job_id: jp14dmdlp job_status: Passed torchscript_onnx_qnn: - inference_time: 525.0 - throughput: 1904.7619047619048 + inference_time: 512.0 + throughput: 1953.125 estimated_peak_memory_range: - min: 163840 - max: 15142160 + min: 167936 + max: 13921376 primary_compute_unit: NPU precision: int8 layer_info: @@ -539,7 +441,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jp3j1nomg + job_id: jgkelkrog job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -548,13 +450,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:39:31Z' + timestamp: '2024-11-09T23:28:45Z' - torchscript_onnx_qnn: - inference_time: 536.0 - throughput: 1865.6716417910447 + inference_time: 570.0 + throughput: 1754.3859649122808 estimated_peak_memory_range: - min: 573440 - max: 573440 + min: 577536 + max: 577536 primary_compute_unit: NPU precision: int8 layer_info: @@ -562,22 +464,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jgkeqv6vg + job_id: jgn69wrm5 job_status: Passed torchscript_onnx: - inference_time: 493.0 - throughput: 2028.3975659229209 + inference_time: 43615.0 + throughput: 22.927891780350798 estimated_peak_memory_range: - min: 1896448 - max: 1896448 + min: 17670144 + max: 17670144 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 47 + layers_on_npu: 151 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 47 - job_id: j5we3wy45 + layers_on_cpu: 26 + total_layers: 177 + job_id: jp3j4rqzg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -586,4 +488,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:39:41Z' + timestamp: '2024-11-09T23:28:52Z' diff --git a/qai_hub_models/models/stable_diffusion_v1_5_quantized/README.md b/qai_hub_models/models/stable_diffusion_v1_5_quantized/README.md index 64ea5be3..eae8ac60 100644 --- a/qai_hub_models/models/stable_diffusion_v1_5_quantized/README.md +++ b/qai_hub_models/models/stable_diffusion_v1_5_quantized/README.md @@ -5,8 +5,7 @@ Generates high resolution images from text prompts using a latent diffusion model. This model uses CLIP ViT-L/14 as text encoder, U-Net based latent denoising, and VAE based decoder to generate the final image. -This is based on the implementation of Stable-Diffusion-v1.5 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/stable_diffusion_v1_5_quantized). diff --git a/qai_hub_models/models/stable_diffusion_v1_5_quantized/requirements.txt b/qai_hub_models/models/stable_diffusion_v1_5_quantized/requirements.txt index 30d9a5e4..047c5390 100644 --- a/qai_hub_models/models/stable_diffusion_v1_5_quantized/requirements.txt +++ b/qai_hub_models/models/stable_diffusion_v1_5_quantized/requirements.txt @@ -1,2 +1,2 @@ transformers==4.41.1 -diffusers[torch]==0.21.4 +diffusers[torch]==0.31.0 diff --git a/qai_hub_models/models/stable_diffusion_v2_1_quantized/README.md b/qai_hub_models/models/stable_diffusion_v2_1_quantized/README.md index 8bb23e47..295e96d6 100644 --- a/qai_hub_models/models/stable_diffusion_v2_1_quantized/README.md +++ b/qai_hub_models/models/stable_diffusion_v2_1_quantized/README.md @@ -5,8 +5,7 @@ Generates high resolution images from text prompts using a latent diffusion model. This model uses CLIP ViT-L/14 as text encoder, U-Net based latent denoising, and VAE based decoder to generate the final image. -This is based on the implementation of Stable-Diffusion-v2.1 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/stable_diffusion_v2_1_quantized). diff --git a/qai_hub_models/models/stable_diffusion_v2_1_quantized/requirements.txt b/qai_hub_models/models/stable_diffusion_v2_1_quantized/requirements.txt index 30d9a5e4..047c5390 100644 --- a/qai_hub_models/models/stable_diffusion_v2_1_quantized/requirements.txt +++ b/qai_hub_models/models/stable_diffusion_v2_1_quantized/requirements.txt @@ -1,2 +1,2 @@ transformers==4.41.1 -diffusers[torch]==0.21.4 +diffusers[torch]==0.31.0 diff --git a/qai_hub_models/models/swin_base/README.md b/qai_hub_models/models/swin_base/README.md index bd54515a..d29d3bd1 100644 --- a/qai_hub_models/models/swin_base/README.md +++ b/qai_hub_models/models/swin_base/README.md @@ -5,8 +5,7 @@ SwinBase is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of Swin-Base found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/swin_base). diff --git a/qai_hub_models/models/swin_base/perf.yaml b/qai_hub_models/models/swin_base/perf.yaml index e6f3eb06..00ca9a00 100644 --- a/qai_hub_models/models/swin_base/perf.yaml +++ b/qai_hub_models/models/swin_base/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: Swin-Base performance_metrics: - torchscript_onnx_tflite: - inference_time: 25383.0 - throughput: 39.396446440531065 + inference_time: 25772.0 + throughput: 38.80180040353872 estimated_peak_memory_range: - min: 16384 - max: 3776544 + min: 53248 + max: 3758608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1568 - job_id: jpede1l85 + job_id: j56y3730p job_status: Passed torchscript_onnx_qnn: - inference_time: 28383.0 - throughput: 35.23235739703343 + inference_time: 28719.0 + throughput: 34.82015390508026 estimated_peak_memory_range: - min: 36864 - max: 51164712 + min: 16384 + max: 49785288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1264 - job_id: jgdxe9zkp + job_id: jp14djq2p job_status: Passed torchscript_onnx: - inference_time: 46863.0 - throughput: 21.338796065126004 + inference_time: 46518.0 + throughput: 21.497054903478222 estimated_peak_memory_range: - min: 77824 - max: 237057760 + min: 102400 + max: 237340008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1150 - job_id: jp8q091zp + job_id: jp0z1je65 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:18:40Z' + timestamp: '2024-11-09T22:06:10Z' - torchscript_onnx_tflite: - inference_time: 25447.0 - throughput: 39.29736314693284 + inference_time: 17643.0 + throughput: 56.67970299835629 estimated_peak_memory_range: - min: 24576 - max: 3326520 + min: 36864 + max: 598061216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1568 - job_id: jgz3o9445 + job_id: jp3j494lg job_status: Passed torchscript_onnx_qnn: - inference_time: 28893.0 - throughput: 34.61045928079466 + inference_time: 20020.0 + throughput: 49.95004995004995 estimated_peak_memory_range: - min: 12288 - max: 44240384 + min: 0 + max: 208089104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1264 - job_id: j57y0w7q5 + job_id: jgdxr37ep job_status: Passed torchscript_onnx: - inference_time: 46526.0 - throughput: 21.49335855220737 + inference_time: 32430.0 + throughput: 30.835646006783843 estimated_peak_memory_range: - min: 81920 - max: 3149645944 + min: 729088 + max: 874304144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1150 - job_id: jgke7n8yg + job_id: jp8q3xwxp job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:18:41Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T22:06:11Z' - torchscript_onnx_tflite: - inference_time: 18145.0 - throughput: 55.11160099200882 + inference_time: 16394.0 + throughput: 60.9979260705136 estimated_peak_memory_range: - min: 40960 - max: 598051408 + min: 36864 + max: 280607200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1568 - job_id: j5we2v145 + job_id: jgo21r1xp job_status: Passed torchscript_onnx_qnn: - inference_time: 20055.0 - throughput: 49.86287708800798 + inference_time: 17094.0 + throughput: 58.5000585000585 estimated_peak_memory_range: - min: 106496 - max: 208439024 + min: 0 + max: 217506784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1264 - job_id: jp4lko9q5 + job_id: j57yj4vl5 job_status: Passed torchscript_onnx: - inference_time: 33512.0 - throughput: 29.840057292910004 + inference_time: 29188.0 + throughput: 34.26065506372482 estimated_peak_memory_range: - min: 0 - max: 871108896 + min: 638976 + max: 345588320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1150 - job_id: j5q6ekv7p + job_id: jgkel4r2g job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:18:42Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T22:06:12Z' - torchscript_onnx_tflite: - inference_time: 16332.0 - throughput: 61.22948812147931 + inference_time: 25498.0 + throughput: 39.218762255863204 estimated_peak_memory_range: - min: 36864 - max: 282648256 + min: 0 + max: 2943760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1568 - job_id: jp14ylvkp + job_id: jpv61d1j5 job_status: Passed torchscript_onnx_qnn: - inference_time: 14655.0 - throughput: 68.23609689525759 + inference_time: 26715.0 + throughput: 37.43215422047539 estimated_peak_memory_range: - min: 614400 - max: 211157904 + min: 684032 + max: 1866432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1264 - job_id: jp0z3q625 - job_status: Passed - torchscript_onnx: - inference_time: 25674.0 - throughput: 38.94991041520605 - estimated_peak_memory_range: - min: 638976 - max: 345839280 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 1150 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 1150 - job_id: j56yejwvp + job_id: jp4lx1jv5 job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:18:44Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T22:06:03Z' - torchscript_onnx_tflite: - inference_time: 25340.0 - throughput: 39.46329913180742 + inference_time: 25545.0 + throughput: 39.146604032100214 estimated_peak_memory_range: - min: 61440 - max: 2950080 + min: 16384 + max: 5637312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1568 - job_id: jg9lj1xmg + job_id: jgjv070xg job_status: Passed torchscript_onnx_qnn: - inference_time: 26536.0 - throughput: 37.68465480856195 + inference_time: 27141.0 + throughput: 36.844626211267084 estimated_peak_memory_range: - min: 729088 - max: 1919296 + min: 364544 + max: 1663032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1264 - job_id: jpxknjdj5 + job_id: j5mnwmvwp job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:18:32Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T22:06:05Z' - torchscript_onnx_tflite: - inference_time: 25248.0 - throughput: 39.60709759188847 + inference_time: 25447.0 + throughput: 39.29736314693284 estimated_peak_memory_range: - min: 45056 - max: 2995936 + min: 98304 + max: 3611664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1568 - job_id: j5q6r8z7p + job_id: jpedrzr15 job_status: Passed torchscript_onnx_qnn: - inference_time: 27209.0 - throughput: 36.75254511374913 + inference_time: 27434.0 + throughput: 36.451119049354816 estimated_peak_memory_range: - min: 716800 - max: 2328384 + min: 53248 + max: 1310448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1264 - job_id: jp3j1kxxg + job_id: jgn69nrr5 job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:11:32Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T22:06:06Z' - torchscript_onnx_tflite: - inference_time: 25483.0 - throughput: 39.24184750618059 + inference_time: 25603.0 + throughput: 39.0579228996602 estimated_peak_memory_range: - min: 53248 - max: 3577248 + min: 126976 + max: 4209104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1568 - job_id: jglv2noe5 + job_id: jgz3xmxk5 job_status: Passed torchscript_onnx_qnn: - inference_time: 27120.0 - throughput: 36.87315634218289 + inference_time: 27181.0 + throughput: 36.79040506235974 estimated_peak_memory_range: - min: 716800 - max: 1828696 + min: 479232 + max: 1798080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1264 - job_id: jgo2nyo4p + job_id: jprv4019g job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:11:33Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T22:06:07Z' - torchscript_onnx_tflite: - inference_time: 25474.0 - throughput: 39.25571170605323 + inference_time: 36059.0 + throughput: 27.732327574253308 estimated_peak_memory_range: - min: 184320 - max: 4548696 + min: 49152 + max: 253410448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1568 - job_id: j56yz6rvp + job_id: j5wed7m65 job_status: Passed torchscript_onnx_qnn: - inference_time: 27119.0 - throughput: 36.874516021977215 + inference_time: 38003.0 + throughput: 26.31371207536247 estimated_peak_memory_range: - min: 675840 - max: 2009768 + min: 696320 + max: 6415488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1264 - job_id: jpv6r3e75 + job_id: jp2k7w34p job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:11:34Z' + chipset: SA8295P + timestamp: '2024-11-09T22:06:08Z' - torchscript_onnx_tflite: - inference_time: 32542.0 - throughput: 30.729518775735972 + inference_time: 32521.0 + throughput: 30.749361950739523 estimated_peak_memory_range: - min: 98304 - max: 567844896 + min: 77824 + max: 565418048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1568 - job_id: jg9lj1xqg + job_id: jg9l3m9lg job_status: Passed torchscript_onnx_qnn: - inference_time: 35914.0 - throughput: 27.844294704015148 + inference_time: 35869.0 + throughput: 27.879227187822355 estimated_peak_memory_range: - min: 696320 - max: 199067392 + min: 471040 + max: 197758400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1264 - job_id: jpy1rwmrp + job_id: jpy14xv7p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:18:38Z' + timestamp: '2024-11-09T22:06:09Z' - torchscript_onnx_qnn: - inference_time: 27679.0 - throughput: 36.128472849452656 + inference_time: 27678.0 + throughput: 36.12977816316208 estimated_peak_memory_range: min: 602112 max: 602112 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1264 - job_id: j5mnq2dyp + job_id: jpxk74e15 job_status: Passed torchscript_onnx: - inference_time: 51876.0 - throughput: 19.276736833988743 + inference_time: 52000.0 + throughput: 19.23076923076923 estimated_peak_memory_range: - min: 207192064 - max: 207192064 + min: 207130624 + max: 207130624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1150 - job_id: jglv6zle5 + job_id: j5q67y94p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:18:43Z' + timestamp: '2024-11-09T22:06:13Z' diff --git a/qai_hub_models/models/swin_small/README.md b/qai_hub_models/models/swin_small/README.md index e5ba3f12..a93b950c 100644 --- a/qai_hub_models/models/swin_small/README.md +++ b/qai_hub_models/models/swin_small/README.md @@ -5,8 +5,7 @@ SwinSmall is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of Swin-Small found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/swin_small). diff --git a/qai_hub_models/models/swin_small/perf.yaml b/qai_hub_models/models/swin_small/perf.yaml index 3baa1fc1..9f9db52a 100644 --- a/qai_hub_models/models/swin_small/perf.yaml +++ b/qai_hub_models/models/swin_small/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: Swin-Small performance_metrics: - torchscript_onnx_tflite: - inference_time: 18761.0 - throughput: 53.30206278982997 + inference_time: 18792.0 + throughput: 53.21413367390379 estimated_peak_memory_range: - min: 53248 - max: 3310560 + min: 65536 + max: 2814160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1563 - job_id: jp3jv3xmg + job_id: j56y3717p job_status: Passed torchscript_onnx_qnn: - inference_time: 21279.0 - throughput: 46.99468960007519 + inference_time: 21109.0 + throughput: 47.373158368468424 estimated_peak_memory_range: - min: 49152 - max: 39649048 + min: 16384 + max: 41474192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: jgdxe986p + job_id: jp14dj8lp job_status: Passed torchscript_onnx: - inference_time: 34387.0 - throughput: 29.080757262919125 + inference_time: 34188.0 + throughput: 29.25002925002925 estimated_peak_memory_range: - min: 94208 - max: 136522960 + min: 98304 + max: 135866296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1145 - job_id: jp8q094qp + job_id: jgn69n9r5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:17:47Z' + timestamp: '2024-11-09T22:05:16Z' - torchscript_onnx_tflite: - inference_time: 18666.0 - throughput: 53.57334190506804 + inference_time: 12781.0 + throughput: 78.24113919098662 estimated_peak_memory_range: - min: 40960 - max: 2683272 + min: 45056 + max: 554034448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1563 - job_id: jgo2k0o1p + job_id: jp3j49mzg job_status: Passed torchscript_onnx_qnn: - inference_time: 21328.0 - throughput: 46.8867216804201 + inference_time: 14548.0 + throughput: 68.73797085510036 estimated_peak_memory_range: - min: 45056 - max: 42344560 + min: 0 + max: 168540080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: j57y0wkn5 + job_id: jgdxr3vlp job_status: Passed torchscript_onnx: - inference_time: 34829.0 - throughput: 28.711705762439347 + inference_time: 23630.0 + throughput: 42.319085907744395 estimated_peak_memory_range: - min: 81920 - max: 137030192 + min: 651264 + max: 824856144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1145 - job_id: jgke7n9vg + job_id: jprv4049g job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:17:48Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T22:05:17Z' - torchscript_onnx_tflite: - inference_time: 12939.0 - throughput: 77.28572532653219 + inference_time: 9771.0 + throughput: 102.34367004400778 estimated_peak_memory_range: - min: 24576 - max: 552822400 + min: 12288 + max: 242381760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1563 - job_id: jpv60oez5 + job_id: jgo21rvdp job_status: Passed torchscript_onnx_qnn: - inference_time: 14349.0 - throughput: 69.69126768415917 + inference_time: 12580.0 + throughput: 79.4912559618442 estimated_peak_memory_range: - min: 0 - max: 166450544 + min: 614400 + max: 168501360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: jp4lkom25 + job_id: j5wed7d65 job_status: Passed torchscript_onnx: - inference_time: 28090.0 - throughput: 35.5998576005696 + inference_time: 20440.0 + throughput: 48.923679060665364 estimated_peak_memory_range: min: 0 - max: 822678592 + max: 333464224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1145 - job_id: j5q6ekmep + job_id: jp2k7w74p job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:17:49Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T22:05:18Z' - torchscript_onnx_tflite: - inference_time: 11738.0 - throughput: 85.19338899301414 + inference_time: 18668.0 + throughput: 53.56760231412042 estimated_peak_memory_range: - min: 32768 - max: 247040800 + min: 61440 + max: 2904320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1563 - job_id: jp14yl7np + job_id: jpv61dwm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 12765.0 - throughput: 78.33920877399139 + inference_time: 20113.0 + throughput: 49.719087157559784 estimated_peak_memory_range: - min: 614400 - max: 168486384 + min: 679936 + max: 1778936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: jp0z3qv05 - job_status: Passed - torchscript_onnx: - inference_time: 20305.0 - throughput: 49.24895345973898 - estimated_peak_memory_range: - min: 0 - max: 327540464 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 1145 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 1145 - job_id: j56yejdnp + job_id: jg9l3m3lg job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:17:51Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T22:05:09Z' - torchscript_onnx_tflite: - inference_time: 18767.0 - throughput: 53.28502158043374 + inference_time: 18669.0 + throughput: 53.564732979806095 estimated_peak_memory_range: - min: 53248 - max: 3499488 + min: 49152 + max: 3199408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1563 - job_id: jgjvzmo1g + job_id: jgjv07l8g job_status: Passed torchscript_onnx_qnn: - inference_time: 20254.0 - throughput: 49.372963365261185 + inference_time: 20593.0 + throughput: 48.5601903559462 estimated_peak_memory_range: - min: 716800 - max: 1864712 + min: 675840 + max: 2042112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: jpxknj385 + job_id: jgdxr3rep job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:17:39Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T22:05:11Z' - torchscript_onnx_tflite: - inference_time: 18648.0 - throughput: 53.625053625053624 + inference_time: 18712.0 + throughput: 53.44164172723386 estimated_peak_memory_range: - min: 57344 - max: 2978216 + min: 49152 + max: 2853160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1563 - job_id: jp4ldq8q5 + job_id: jpedrzv05 job_status: Passed torchscript_onnx_qnn: - inference_time: 20619.0 - throughput: 48.49895727241864 + inference_time: 20645.0 + throughput: 48.437878420925166 estimated_peak_memory_range: - min: 679936 - max: 1856728 + min: 675840 + max: 2039536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: jgn6m2ov5 + job_id: j57yj4jl5 job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:11:01Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T22:05:12Z' - torchscript_onnx_tflite: - inference_time: 18700.0 - throughput: 53.475935828877006 + inference_time: 18756.0 + throughput: 53.31627212625293 estimated_peak_memory_range: - min: 45056 - max: 2848088 + min: 77824 + max: 3161872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1563 - job_id: jpxk6vmj5 + job_id: jgz3xm765 job_status: Passed torchscript_onnx_qnn: - inference_time: 20680.0 - throughput: 48.355899419729205 + inference_time: 20743.0 + throughput: 48.20903437304151 estimated_peak_memory_range: - min: 712704 - max: 1842592 + min: 704512 + max: 2347192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: jprv2kovg + job_id: jp4lx1xv5 job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:11:02Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T22:05:13Z' - torchscript_onnx_tflite: - inference_time: 18883.0 - throughput: 52.95768680824022 + inference_time: 26993.0 + throughput: 37.046641721927905 estimated_peak_memory_range: - min: 36864 - max: 3415360 + min: 20480 + max: 225476160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1563 - job_id: j5mn6r4yp + job_id: j5wed79j5 job_status: Passed torchscript_onnx_qnn: - inference_time: 20539.0 - throughput: 48.68786211597449 + inference_time: 28889.0 + throughput: 34.615251479802 estimated_peak_memory_range: - min: 684032 - max: 2181816 + min: 671744 + max: 6144272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: jp2k984xp + job_id: jpxk74715 job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:11:03Z' + chipset: SA8295P + timestamp: '2024-11-09T22:05:14Z' - torchscript_onnx_tflite: - inference_time: 24362.0 - throughput: 41.0475330432641 + inference_time: 24173.0 + throughput: 41.36846895296405 estimated_peak_memory_range: - min: 0 - max: 536157248 + min: 20480 + max: 534274096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1563 - job_id: jg9lj1kmg + job_id: jg9l3m4vg job_status: Passed torchscript_onnx_qnn: - inference_time: 26132.0 - throughput: 38.26725853359865 + inference_time: 25947.0 + throughput: 38.540100975064554 estimated_peak_memory_range: - min: 634880 - max: 160254752 + min: 614400 + max: 163445904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: jpy1rwq0p + job_id: j5mnwmwwp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:17:45Z' + timestamp: '2024-11-09T22:05:15Z' - torchscript_onnx_qnn: - inference_time: 21158.0 - throughput: 47.26344645051517 + inference_time: 21225.0 + throughput: 47.11425206124853 estimated_peak_memory_range: min: 602112 max: 602112 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: j5mnq2o7p + job_id: jp14djd2p job_status: Passed torchscript_onnx: - inference_time: 38008.0 - throughput: 26.31025047358451 + inference_time: 37962.0 + throughput: 26.3421316052895 estimated_peak_memory_range: - min: 123580416 - max: 123580416 + min: 123555840 + max: 123555840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1145 - job_id: jglv6z125 + job_id: jpy14x47p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:17:50Z' + timestamp: '2024-11-09T22:05:20Z' diff --git a/qai_hub_models/models/swin_tiny/README.md b/qai_hub_models/models/swin_tiny/README.md index 33acf7ff..d7387050 100644 --- a/qai_hub_models/models/swin_tiny/README.md +++ b/qai_hub_models/models/swin_tiny/README.md @@ -5,8 +5,7 @@ SwinTiny is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of Swin-Tiny found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/swin_tiny). diff --git a/qai_hub_models/models/swin_tiny/perf.yaml b/qai_hub_models/models/swin_tiny/perf.yaml index e6dece66..309a8e56 100644 --- a/qai_hub_models/models/swin_tiny/perf.yaml +++ b/qai_hub_models/models/swin_tiny/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: Swin-Tiny performance_metrics: - torchscript_onnx_tflite: - inference_time: 11798.0 - throughput: 84.76012883539583 + inference_time: 11870.0 + throughput: 84.24599831508003 estimated_peak_memory_range: - min: 24576 - max: 2672104 + min: 32768 + max: 2552528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 837 - job_id: jp8q098qp + job_id: jp8q3xz8p job_status: Passed torchscript_onnx_qnn: - inference_time: 13157.0 - throughput: 76.00516835144789 + inference_time: 13104.0 + throughput: 76.31257631257631 estimated_peak_memory_range: - min: 180224 - max: 28733480 + min: 32768 + max: 23875552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 709 - job_id: jpede1285 + job_id: jpedrzk05 job_status: Passed torchscript_onnx: - inference_time: 19933.0 - throughput: 50.16806301108714 + inference_time: 19834.0 + throughput: 50.418473328627606 estimated_peak_memory_range: - min: 57344 - max: 69199296 + min: 45056 + max: 69216608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 623 - job_id: jgn6lyxj5 + job_id: jgn69nem5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:16:59Z' + timestamp: '2024-11-09T22:04:26Z' - torchscript_onnx_tflite: - inference_time: 11921.0 - throughput: 83.88558006878617 + inference_time: 8136.0 + throughput: 122.91052114060963 estimated_peak_memory_range: - min: 45056 - max: 2950184 + min: 36864 + max: 347801408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 837 - job_id: jgke7ndvg + job_id: jgkel43og job_status: Passed torchscript_onnx_qnn: - inference_time: 13202.0 - throughput: 75.74609907589759 + inference_time: 8917.0 + throughput: 112.145340361108 estimated_peak_memory_range: - min: 53248 - max: 23950312 + min: 618496 + max: 117444448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 709 - job_id: jgz3o9w45 + job_id: jgz3xmr65 job_status: Passed torchscript_onnx: - inference_time: 19920.0 - throughput: 50.200803212851405 + inference_time: 13776.0 + throughput: 72.59001161440186 estimated_peak_memory_range: - min: 53248 - max: 69077280 + min: 0 + max: 486776336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 623 - job_id: jprv8q9kg + job_id: jprv40yeg job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:17:00Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T22:04:27Z' - torchscript_onnx_tflite: - inference_time: 9813.0 - throughput: 101.9056353816366 + inference_time: 7319.0 + throughput: 136.6306872523569 estimated_peak_memory_range: - min: 40960 - max: 345255104 + min: 24576 + max: 163784848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 837 - job_id: j5q6ekwep + job_id: j5q67y3mp job_status: Passed torchscript_onnx_qnn: - inference_time: 8857.0 - throughput: 112.90504685559445 + inference_time: 7661.0 + throughput: 130.53126223730584 estimated_peak_memory_range: - min: 618496 - max: 113769712 + min: 614400 + max: 112336816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 709 - job_id: j5we2vx45 + job_id: j5wed7qj5 job_status: Passed torchscript_onnx: - inference_time: 14020.0 - throughput: 71.32667617689016 + inference_time: 11990.0 + throughput: 83.40283569641367 estimated_peak_memory_range: min: 0 - max: 484010496 + max: 221925680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 623 - job_id: jp2k06j6p + job_id: jp2k7wmmp job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:17:01Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T22:04:28Z' - torchscript_onnx_tflite: - inference_time: 7340.0 - throughput: 136.23978201634878 + inference_time: 11795.0 + throughput: 84.7816871555744 estimated_peak_memory_range: - min: 28672 - max: 164181904 + min: 40960 + max: 682329016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 837 - job_id: jgjvzm11g + job_id: jglv0x3l5 job_status: Passed torchscript_onnx_qnn: - inference_time: 7615.0 - throughput: 131.31976362442546 + inference_time: 12321.0 + throughput: 81.16224332440548 estimated_peak_memory_range: - min: 614400 - max: 111722384 + min: 647168 + max: 2018032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 709 - job_id: j5mnq247p - job_status: Passed - torchscript_onnx: - inference_time: 12019.0 - throughput: 83.20159747067143 - estimated_peak_memory_range: - min: 45056 - max: 226668656 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 623 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 623 - job_id: jp0z3qd05 + job_id: jg9l3mwvg job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:17:03Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T22:04:18Z' - torchscript_onnx_tflite: - inference_time: 11793.0 - throughput: 84.79606546256254 + inference_time: 11805.0 + throughput: 84.70986869970352 estimated_peak_memory_range: - min: 45056 - max: 683245064 + min: 36864 + max: 668632080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 837 - job_id: jglv6z725 + job_id: j56y37n7p job_status: Passed torchscript_onnx_qnn: - inference_time: 12281.0 - throughput: 81.4265939255761 + inference_time: 12289.0 + throughput: 81.37358613394092 estimated_peak_memory_range: - min: 634880 - max: 1804528 + min: 663552 + max: 1848216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 709 - job_id: jg9lj18mg + job_id: jgdxr3olp job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:16:51Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T22:04:20Z' - torchscript_onnx_tflite: - inference_time: 11874.0 - throughput: 84.21761832575375 + inference_time: 11895.0 + throughput: 84.06893652795293 estimated_peak_memory_range: - min: 32768 - max: 3028160 + min: 24576 + max: 2773944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 837 - job_id: jgz3jew45 + job_id: jp3j49ezg job_status: Passed torchscript_onnx_qnn: - inference_time: 12368.0 - throughput: 80.85381630012937 + inference_time: 12362.0 + throughput: 80.89305937550559 estimated_peak_memory_range: - min: 675840 - max: 2006864 + min: 659456 + max: 1925960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 709 - job_id: jp14w03np + job_id: j57yj4dr5 job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:10:33Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T22:04:22Z' - torchscript_onnx_tflite: - inference_time: 11859.0 - throughput: 84.32414200185514 + inference_time: 11845.0 + throughput: 84.42380751371887 estimated_peak_memory_range: - min: 24576 - max: 2258496 + min: 221184 + max: 3292784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 837 - job_id: j5we3ox45 + job_id: jgo21r3dp job_status: Passed torchscript_onnx_qnn: - inference_time: 12328.0 - throughput: 81.11615833874107 + inference_time: 12431.0 + throughput: 80.44405116241654 estimated_peak_memory_range: - min: 659456 - max: 2217720 + min: 675840 + max: 2329864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 709 - job_id: jgdxqw06p + job_id: jp4lx1wl5 job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:10:34Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T22:04:23Z' - torchscript_onnx_tflite: - inference_time: 11828.0 - throughput: 84.54514710855597 + inference_time: 17096.0 + throughput: 58.493214787084696 estimated_peak_memory_range: - min: 20480 - max: 444165616 + min: 36864 + max: 154017632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 837 - job_id: jg9lyv8mg + job_id: jpv61dvm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 12309.0 - throughput: 81.24136810463888 + inference_time: 17321.0 + throughput: 57.73338721782807 estimated_peak_memory_range: min: 655360 - max: 2368872 + max: 6479648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 709 - job_id: j5we3oxz5 + job_id: jpxk74195 job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:10:35Z' + chipset: SA8295P + timestamp: '2024-11-09T22:04:24Z' - torchscript_onnx_tflite: - inference_time: 15083.0 - throughput: 66.29980773055757 + inference_time: 15100.0 + throughput: 66.2251655629139 estimated_peak_memory_range: - min: 49152 - max: 334829328 + min: 16384 + max: 333598224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 837 - job_id: jpv60o4z5 + job_id: jgjv07e8g job_status: Passed torchscript_onnx_qnn: - inference_time: 16508.0 - throughput: 60.5766900896535 + inference_time: 16693.0 + throughput: 59.905349547714614 estimated_peak_memory_range: - min: 0 - max: 104168048 + min: 679936 + max: 107614448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 709 - job_id: jpxknjm85 + job_id: j5mnwmzqp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:16:56Z' + timestamp: '2024-11-09T22:04:25Z' - torchscript_onnx_qnn: - inference_time: 12937.0 - throughput: 77.29767334003246 + inference_time: 12932.0 + throughput: 77.32755954222085 estimated_peak_memory_range: min: 602112 max: 602112 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 709 - job_id: jp14yl3np + job_id: jp14djelp job_status: Passed torchscript_onnx: - inference_time: 22098.0 - throughput: 45.25296406914653 + inference_time: 22131.0 + throughput: 45.18548642176133 estimated_peak_memory_range: - min: 68640768 - max: 68640768 + min: 67493888 + max: 67493888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 623 - job_id: jpy1rwn0p + job_id: jpy14xd4p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:17:02Z' + timestamp: '2024-11-09T22:04:29Z' diff --git a/qai_hub_models/models/trocr/README.md b/qai_hub_models/models/trocr/README.md index 49968d83..2d941f74 100644 --- a/qai_hub_models/models/trocr/README.md +++ b/qai_hub_models/models/trocr/README.md @@ -5,8 +5,7 @@ End-to-end text recognition approach with pre-trained image transformer and text transformer models for both image understanding and wordpiece-level text generation. -This is based on the implementation of TrOCR found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/trocr). diff --git a/qai_hub_models/models/trocr/perf.yaml b/qai_hub_models/models/trocr/perf.yaml index 1e5556ef..d13c1ff9 100644 --- a/qai_hub_models/models/trocr/perf.yaml +++ b/qai_hub_models/models/trocr/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: TrOCREncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 50879.0 - throughput: 19.65447434108375 + inference_time: 50570.0 + throughput: 19.774569903104606 estimated_peak_memory_range: - min: 7196672 - max: 9589904 + min: 7180288 + max: 9726120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 591 - job_id: jprv8qy7g + job_id: jp4lx1kl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 52066.0 - throughput: 19.206391887220068 + inference_time: 51644.0 + throughput: 19.363333591511115 estimated_peak_memory_range: - min: 1912832 - max: 23677592 + min: 1859584 + max: 23919360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 443 - job_id: jgdxe9rzp + job_id: jgz3xmo65 job_status: Passed torchscript_onnx: - inference_time: 38727.0 - throughput: 25.821778087639114 + inference_time: 38248.0 + throughput: 26.145157916753817 estimated_peak_memory_range: - min: 57344 - max: 118354280 + min: 32768 + max: 117774056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 396 - job_id: jgo2k0e1p + job_id: jgo21rndp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:16:03Z' + timestamp: '2024-11-09T22:03:30Z' - torchscript_onnx_tflite: - inference_time: 51638.0 - throughput: 19.365583485030402 + inference_time: 40227.0 + throughput: 24.858925597235686 estimated_peak_memory_range: - min: 7196672 - max: 9388912 + min: 5849088 + max: 325725728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 591 - job_id: jpy1rwdlp + job_id: j5mnwmqqp job_status: Passed torchscript_onnx_qnn: - inference_time: 51782.0 - throughput: 19.31172994476845 + inference_time: 41582.0 + throughput: 24.04886729835025 estimated_peak_memory_range: - min: 1859584 - max: 23895256 + min: 1810432 + max: 69052480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 443 - job_id: jg9lj13mg + job_id: jg9l3mjvg job_status: Passed torchscript_onnx: - inference_time: 39407.0 - throughput: 25.37620219757911 + inference_time: 29203.0 + throughput: 34.243057220148614 estimated_peak_memory_range: - min: 16384 - max: 117558168 + min: 0 + max: 362809552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 396 - job_id: jgjvzmk1g + job_id: jgjv0728g job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:16:05Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T22:03:32Z' - torchscript_onnx_tflite: - inference_time: 40591.0 - throughput: 24.63600305486438 + inference_time: 32548.0 + throughput: 30.72385400024579 estimated_peak_memory_range: - min: 7196672 - max: 323262704 + min: 5586944 + max: 128371648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 591 - job_id: jp8q097op + job_id: jprv408eg job_status: Passed torchscript_onnx_qnn: - inference_time: 42121.0 - throughput: 23.74112675387574 + inference_time: 33111.0 + throughput: 30.201443629005468 estimated_peak_memory_range: - min: 1806336 - max: 66713584 + min: 1781760 + max: 69478560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 443 - job_id: jgdxe9r6p + job_id: jgdxr3elp job_status: Passed torchscript_onnx: - inference_time: 30767.0 - throughput: 32.50235642084051 + inference_time: 25501.0 + throughput: 39.21414846476609 estimated_peak_memory_range: - min: 8364032 - max: 369986800 + min: 17039360 + max: 228118368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 396 - job_id: jgz3o9v45 + job_id: jgz3xmj65 job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:16:07Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T22:03:34Z' - torchscript_onnx_tflite: - inference_time: 36434.0 - throughput: 27.446890267332712 + inference_time: 50102.0 + throughput: 19.959283062552394 estimated_peak_memory_range: - min: 6533120 - max: 129017392 + min: 7172096 + max: 9411360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 591 - job_id: jg9lj138g + job_id: jpy14xr4p job_status: Passed torchscript_onnx_qnn: - inference_time: 29549.0 - throughput: 33.842092795018445 + inference_time: 36279.0 + throughput: 27.564155572094048 estimated_peak_memory_range: - min: 1822720 - max: 69519760 + min: 1929216 + max: 3213184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 443 - job_id: j56yejqnp - job_status: Passed - torchscript_onnx: - inference_time: 25329.0 - throughput: 39.48043744324687 - estimated_peak_memory_range: - min: 2961408 - max: 213851040 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 396 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 396 - job_id: jgdxe976p + job_id: jp4lx1dl5 job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:16:10Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T22:03:17Z' - torchscript_onnx_tflite: - inference_time: 50069.0 - throughput: 19.972438035510994 + inference_time: 50440.0 + throughput: 19.825535289452816 estimated_peak_memory_range: - min: 7168000 - max: 9417912 + min: 7180288 + max: 9347856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 591 - job_id: j5q6ek2op + job_id: jp8q3x08p job_status: Passed torchscript_onnx_qnn: - inference_time: 36573.0 - throughput: 27.34257512372515 + inference_time: 36944.0 + throughput: 27.067994802944998 estimated_peak_memory_range: - min: 1921024 - max: 3767664 + min: 1933312 + max: 3879936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 443 - job_id: jp4lkox25 + job_id: jprv402eg job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:15:49Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T22:03:21Z' - torchscript_onnx_tflite: - inference_time: 50585.0 - throughput: 19.768706138183255 + inference_time: 51532.0 + throughput: 19.405417992703562 estimated_peak_memory_range: - min: 7188480 - max: 9466640 + min: 7122944 + max: 9512528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 591 - job_id: jgdxqw76p + job_id: j5q67yemp job_status: Passed torchscript_onnx_qnn: - inference_time: 37057.0 - throughput: 26.985454839841324 + inference_time: 36720.0 + throughput: 27.233115468409586 estimated_peak_memory_range: - min: 1916928 - max: 3220848 + min: 1863680 + max: 3550400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 443 - job_id: jprv2k1kg + job_id: jpy14xj4p job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:10:01Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T22:03:23Z' - torchscript_onnx_tflite: - inference_time: 50581.0 - throughput: 19.77026946877286 + inference_time: 50666.0 + throughput: 19.737101803971104 estimated_peak_memory_range: - min: 7163904 - max: 9110568 + min: 7204864 + max: 9464016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 591 - job_id: jp4ldqj25 + job_id: j56y37e7p job_status: Passed torchscript_onnx_qnn: - inference_time: 37009.0 - throughput: 27.02045448404442 + inference_time: 36616.0 + throughput: 27.31046537032991 estimated_peak_memory_range: - min: 1875968 - max: 3164160 + min: 1904640 + max: 3780696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 443 - job_id: jpy1jev0p + job_id: jp8q3xm8p job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:10:03Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T22:03:25Z' - torchscript_onnx_tflite: - inference_time: 50817.0 - throughput: 19.678454060648995 + inference_time: 65458.0 + throughput: 15.276971493171194 estimated_peak_memory_range: - min: 7192576 - max: 9684976 + min: 7200768 + max: 118061632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 591 - job_id: j5mn6rv7p + job_id: jgo21rkdp job_status: Passed torchscript_onnx_qnn: - inference_time: 37128.0 - throughput: 26.933850463262228 + inference_time: 50099.0 + throughput: 19.960478253058945 estimated_peak_memory_range: - min: 3026944 - max: 12769640 + min: 1884160 + max: 7782064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 443 - job_id: jp8qmo8qp + job_id: j5q67yrmp job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:10:05Z' + chipset: SA8295P + timestamp: '2024-11-09T22:03:26Z' - torchscript_onnx_tflite: - inference_time: 61159.0 - throughput: 16.35082326395134 + inference_time: 61165.0 + throughput: 16.34921932477724 estimated_peak_memory_range: - min: 7163904 - max: 314139744 + min: 7368704 + max: 315248304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 591 - job_id: jgz3o9xx5 + job_id: jgjv07z8g job_status: Passed torchscript_onnx_qnn: - inference_time: 60654.0 - throughput: 16.48695881557688 + inference_time: 60141.0 + throughput: 16.627591825875857 estimated_peak_memory_range: - min: 0 - max: 64992048 + min: 1785856 + max: 70134816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 443 - job_id: j5q6ek9ep + job_id: j56y37z7p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:15:59Z' + timestamp: '2024-11-09T22:03:28Z' - torchscript_onnx_qnn: - inference_time: 33893.0 - throughput: 29.504617472634468 + inference_time: 33869.0 + throughput: 29.525524816203607 estimated_peak_memory_range: min: 1773568 max: 1773568 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 443 - job_id: j5mnq2w7p + job_id: j5mnwm6qp job_status: Passed torchscript_onnx: - inference_time: 36096.0 - throughput: 27.70390070921986 + inference_time: 35826.0 + throughput: 27.91268910846871 estimated_peak_memory_range: - min: 114704384 - max: 114704384 + min: 114413568 + max: 114413568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 396 - job_id: jg9lj19mg + job_id: jg9l3myvg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,15 +466,15 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:16:09Z' + timestamp: '2024-11-09T22:03:35Z' - name: TrOCRDecoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 2590.0 - throughput: 386.1003861003861 + inference_time: 2631.0 + throughput: 380.08361839604714 estimated_peak_memory_range: - min: 16384 - max: 1724176 + min: 20480 + max: 911680256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -495,14 +482,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 399 - job_id: jp2k06mqp + job_id: jpxk74n95 job_status: Passed torchscript_onnx_qnn: - inference_time: 2965.0 - throughput: 337.2681281618887 + inference_time: 2924.0 + throughput: 341.9972640218878 estimated_peak_memory_range: - min: 4202496 - max: 243436920 + min: 16384 + max: 132118400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -510,14 +497,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 375 - job_id: j5we2vd45 + job_id: j5wed72j5 job_status: Passed torchscript_onnx: - inference_time: 2846.0 - throughput: 351.37034434293747 + inference_time: 2924.0 + throughput: 341.9972640218878 estimated_peak_memory_range: - min: 659456 - max: 2920672 + min: 20480 + max: 77320944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -525,7 +512,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 395 - job_id: jpv60ozz5 + job_id: jpv61drm5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -534,13 +521,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:16:04Z' + timestamp: '2024-11-09T22:03:31Z' - torchscript_onnx_tflite: - inference_time: 2559.0 - throughput: 390.77764751856193 + inference_time: 1848.0 + throughput: 541.1255411255411 estimated_peak_memory_range: min: 12288 - max: 2273688 + max: 198473392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -548,14 +535,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 399 - job_id: jp0z3qrn5 + job_id: jgn69nlm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3076.0 - throughput: 325.0975292587776 + inference_time: 2103.0 + throughput: 475.51117451260103 estimated_peak_memory_range: - min: 28672 - max: 320929032 + min: 0 + max: 52786400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -563,14 +550,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 375 - job_id: jp14yldnp + job_id: jp14djylp job_status: Passed torchscript_onnx: - inference_time: 2908.0 - throughput: 343.878954607978 + inference_time: 1991.0 + throughput: 502.26017076845807 estimated_peak_memory_range: - min: 36864 - max: 78142016 + min: 0 + max: 155691968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -578,22 +565,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 395 - job_id: jpede1485 + job_id: jpedrzw05 job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:16:06Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T22:03:32Z' - torchscript_onnx_tflite: - inference_time: 1849.0 - throughput: 540.8328826392644 + inference_time: 1961.0 + throughput: 509.94390617032127 estimated_peak_memory_range: - min: 12288 - max: 198762480 + min: 8192 + max: 27879040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -601,14 +588,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 399 - job_id: jgke7nyng + job_id: jp2k7w0mp job_status: Passed torchscript_onnx_qnn: - inference_time: 2111.0 - throughput: 473.70914258645195 + inference_time: 1744.0 + throughput: 573.394495412844 estimated_peak_memory_range: min: 0 - max: 53380064 + max: 46959952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -616,14 +603,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 375 - job_id: j57y0wjn5 + job_id: j57yj4lr5 job_status: Passed torchscript_onnx: - inference_time: 2156.0 - throughput: 463.821892393321 + inference_time: 1788.0 + throughput: 559.2841163310962 estimated_peak_memory_range: min: 0 - max: 154813824 + max: 36048800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -631,22 +618,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 395 - job_id: j5we2vm45 + job_id: j5wed73j5 job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:16:07Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T22:03:34Z' - torchscript_onnx_tflite: - inference_time: 1656.0 - throughput: 603.864734299517 + inference_time: 2551.0 + throughput: 392.0031360250882 estimated_peak_memory_range: - min: 8192 - max: 28048528 + min: 32768 + max: 1041581584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -654,14 +641,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 399 - job_id: jp14yld7p + job_id: jp0z1j3e5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1747.0 - throughput: 572.4098454493417 + inference_time: 2629.0 + throughput: 380.3727653100038 estimated_peak_memory_range: - min: 0 - max: 47430528 + min: 675840 + max: 2076760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -669,37 +656,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 375 - job_id: jp3jv3qmg - job_status: Passed - torchscript_onnx: - inference_time: 1799.0 - throughput: 555.864369093941 - estimated_peak_memory_range: - min: 0 - max: 36017760 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 395 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 395 - job_id: j57y0wvn5 + job_id: jpxk74695 job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:16:11Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T22:03:17Z' - torchscript_onnx_tflite: - inference_time: 2587.0 - throughput: 386.5481252415926 + inference_time: 2626.0 + throughput: 380.8073115003808 estimated_peak_memory_range: min: 12288 - max: 2113792 + max: 2445616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -707,14 +679,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 399 - job_id: jglv6z0m5 + job_id: jgkel47og job_status: Passed torchscript_onnx_qnn: - inference_time: 2616.0 - throughput: 382.262996941896 + inference_time: 2671.0 + throughput: 374.3916136278547 estimated_peak_memory_range: - min: 1277952 - max: 3331136 + min: 1921024 + max: 4076160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -722,22 +694,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 375 - job_id: jpxknj785 + job_id: jp2k7w9mp job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:15:50Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T22:03:21Z' - torchscript_onnx_tflite: - inference_time: 2620.0 - throughput: 381.6793893129771 + inference_time: 2585.0 + throughput: 386.84719535783364 estimated_peak_memory_range: min: 12288 - max: 1829720 + max: 1960952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -745,14 +717,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 399 - job_id: j57ylzvn5 + job_id: jglv0x6l5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2728.0 - throughput: 366.5689149560117 + inference_time: 2676.0 + throughput: 373.69207772795215 estimated_peak_memory_range: - min: 16384 - max: 1562872 + min: 1314816 + max: 2693536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -760,22 +732,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 375 - job_id: jp2k9836p + job_id: jp0z1j2e5 job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:10:02Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T22:03:23Z' - torchscript_onnx_tflite: - inference_time: 2666.0 - throughput: 375.0937734433608 + inference_time: 2625.0 + throughput: 380.95238095238096 estimated_peak_memory_range: - min: 20480 - max: 1812608 + min: 12288 + max: 2618152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -783,14 +755,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 399 - job_id: jpxk6ve85 + job_id: jp3j49vzg job_status: Passed torchscript_onnx_qnn: - inference_time: 2741.0 - throughput: 364.8303538854433 + inference_time: 2661.0 + throughput: 375.7985719654265 estimated_peak_memory_range: - min: 16384 - max: 1527008 + min: 20480 + max: 2071512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -798,22 +770,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 375 - job_id: jp0z2yk05 + job_id: jgkel4qog job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:10:04Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T22:03:25Z' - torchscript_onnx_tflite: - inference_time: 2613.0 - throughput: 382.70187523918867 + inference_time: 3318.0 + throughput: 301.38637733574444 estimated_peak_memory_range: min: 12288 - max: 2217552 + max: 26596960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -821,14 +793,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 399 - job_id: jgn6m2rj5 + job_id: jpv61d0m5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2705.0 - throughput: 369.68576709796673 + inference_time: 3953.0 + throughput: 252.97242600556538 estimated_peak_memory_range: - min: 77824 - max: 1569760 + min: 7385088 + max: 12804512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -836,22 +808,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 375 - job_id: jgkeqzdvg + job_id: jglv0x2l5 job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:10:05Z' + chipset: SA8295P + timestamp: '2024-11-09T22:03:27Z' - torchscript_onnx_tflite: - inference_time: 2855.0 - throughput: 350.2626970227671 + inference_time: 2868.0 + throughput: 348.6750348675035 estimated_peak_memory_range: - min: 16384 - max: 197560784 + min: 0 + max: 197810720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -859,14 +831,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 399 - job_id: j5we2vdm5 + job_id: jpedrze05 job_status: Passed torchscript_onnx_qnn: - inference_time: 3400.0 - throughput: 294.11764705882354 + inference_time: 3325.0 + throughput: 300.7518796992481 estimated_peak_memory_range: - min: 4399104 - max: 52988688 + min: 5947392 + max: 52962784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -874,7 +846,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 375 - job_id: jglv6ze25 + job_id: jp3j491zg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -883,13 +855,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:16:00Z' + timestamp: '2024-11-09T22:03:29Z' - torchscript_onnx_qnn: - inference_time: 2793.0 - throughput: 358.03795202291445 + inference_time: 2840.0 + throughput: 352.11267605633805 estimated_peak_memory_range: - min: 7389184 - max: 7389184 + min: 7393280 + max: 7393280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -897,14 +869,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 375 - job_id: jgn6ly9j5 + job_id: jgn69nmm5 job_status: Passed torchscript_onnx: - inference_time: 2912.0 - throughput: 343.4065934065934 + inference_time: 2887.0 + throughput: 346.38032559750604 estimated_peak_memory_range: - min: 70930432 - max: 70930432 + min: 71000064 + max: 71000064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -912,7 +884,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 395 - job_id: jp14ylqnp + job_id: jp14djwlp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -921,4 +893,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:16:09Z' + timestamp: '2024-11-09T22:03:36Z' diff --git a/qai_hub_models/models/unet_segmentation/README.md b/qai_hub_models/models/unet_segmentation/README.md index f29f695b..20cd52b1 100644 --- a/qai_hub_models/models/unet_segmentation/README.md +++ b/qai_hub_models/models/unet_segmentation/README.md @@ -5,8 +5,7 @@ UNet is a machine learning model that produces a segmentation mask for an image. The most basic use case will label each pixel in the image as being in the foreground or the background. More advanced usage will assign a class label to each pixel. This version of the model was trained on the data from Kaggle's Carvana Image Masking Challenge (see https://www.kaggle.com/c/carvana-image-masking-challenge) and is used for vehicle segmentation. -This is based on the implementation of Unet-Segmentation found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/unet_segmentation). diff --git a/qai_hub_models/models/unet_segmentation/perf.yaml b/qai_hub_models/models/unet_segmentation/perf.yaml index 5dd9ee52..d84c7b47 100644 --- a/qai_hub_models/models/unet_segmentation/perf.yaml +++ b/qai_hub_models/models/unet_segmentation/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: Unet-Segmentation performance_metrics: - torchscript_onnx_tflite: - inference_time: 150752.0 - throughput: 6.633411165357674 + inference_time: 150193.0 + throughput: 6.658099911447271 estimated_peak_memory_range: - min: 6692864 - max: 463369432 + min: 147456 + max: 550828568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 32 - job_id: jp4lkov15 + job_id: jp4lx1lq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 153801.0 - throughput: 6.501908310089011 + inference_time: 161015.0 + throughput: 6.21060149675496 estimated_peak_memory_range: - min: 9945088 - max: 41122592 + min: 9986048 + max: 41825248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 52 - job_id: jgke7n3ng + job_id: jgkel4vyg job_status: Passed torchscript_onnx: - inference_time: 152695.0 - throughput: 6.549002914306297 + inference_time: 161889.0 + throughput: 6.1770719443569355 estimated_peak_memory_range: - min: 12288 - max: 59465296 + min: 15413248 + max: 17224400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: j5we2v9m5 + job_id: j5wed7wz5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:14:53Z' + timestamp: '2024-11-09T22:02:21Z' - torchscript_onnx_tflite: - inference_time: 161109.0 - throughput: 6.206977884537797 + inference_time: 111024.0 + throughput: 9.007061536244416 estimated_peak_memory_range: - min: 6705152 - max: 463190736 + min: 5754880 + max: 409362192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 32 - job_id: jpxknjyl5 + job_id: jpxk74kj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 155694.0 - throughput: 6.422855087543515 + inference_time: 111294.0 + throughput: 8.985210343774147 estimated_peak_memory_range: - min: 9895936 - max: 30984992 + min: 21012480 + max: 108478224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 52 - job_id: j5q6ek3op + job_id: j5q67y07p job_status: Passed torchscript_onnx: - inference_time: 155616.0 - throughput: 6.4260744396463085 + inference_time: 113283.0 + throughput: 8.827449838016296 estimated_peak_memory_range: - min: 155648 - max: 58980952 + min: 25784320 + max: 445922176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jg9lj148g + job_id: jg9l3m0qg job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:14:54Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T22:02:22Z' - torchscript_onnx_tflite: - inference_time: 132265.0 - throughput: 7.560579140362152 + inference_time: 103436.0 + throughput: 9.667813913917785 estimated_peak_memory_range: - min: 6791168 - max: 409865968 + min: 5177344 + max: 122492480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 32 - job_id: j5mnq239p + job_id: j5mnwmnyp job_status: Passed torchscript_onnx_qnn: - inference_time: 132878.0 - throughput: 7.52570026640979 + inference_time: 90170.0 + throughput: 11.090163025396473 estimated_peak_memory_range: - min: 9949184 - max: 98457120 + min: 9916416 + max: 115610656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 52 - job_id: jglv6zkm5 + job_id: jglv0x4e5 job_status: Passed torchscript_onnx: - inference_time: 134065.0 - throughput: 7.459068362361541 + inference_time: 104832.0 + throughput: 9.539072039072039 estimated_peak_memory_range: - min: 782336 - max: 421486560 + min: 19693568 + max: 142513504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jp14yl87p + job_id: jp14dj2kp job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:14:55Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T22:02:23Z' - torchscript_onnx_tflite: - inference_time: 102768.0 - throughput: 9.730655456951581 + inference_time: 153377.0 + throughput: 6.519882381321841 estimated_peak_memory_range: - min: 6250496 - max: 123986016 + min: 589824 + max: 472773008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 32 - job_id: jp8q09zop + job_id: jgn69n0v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 102943.0 - throughput: 9.71411363570131 + inference_time: 136975.0 + throughput: 7.300602299689724 estimated_peak_memory_range: - min: 9703424 - max: 115785008 + min: 10092544 + max: 11337024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 52 - job_id: jgz3o97x5 - job_status: Passed - torchscript_onnx: - inference_time: 104609.0 - throughput: 9.55940693439379 - estimated_peak_memory_range: - min: 27054080 - max: 149817200 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 53 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 53 - job_id: j57y0wd95 + job_id: j56y372vp job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:14:57Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T22:02:13Z' - torchscript_onnx_tflite: - inference_time: 147315.0 - throughput: 6.788174999151478 + inference_time: 149315.0 + throughput: 6.697250778555403 estimated_peak_memory_range: - min: 20480 - max: 472360984 + min: 16384 + max: 472418920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 32 - job_id: jgn6ly3q5 + job_id: jprv406vg job_status: Passed torchscript_onnx_qnn: - inference_time: 138036.0 - throughput: 7.244486945434525 + inference_time: 139944.0 + throughput: 7.145715429028755 estimated_peak_memory_range: - min: 10121216 - max: 11270704 + min: 10153984 + max: 18670256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 52 - job_id: j56yej1yp + job_id: jgo21rz4p job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:14:45Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T22:02:15Z' - torchscript_onnx_tflite: - inference_time: 154314.0 - throughput: 6.480293427686406 + inference_time: 154145.0 + throughput: 6.487398228940283 estimated_peak_memory_range: min: 6705152 - max: 463185688 + max: 462870296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 32 - job_id: jp3j1kqmg + job_id: jp2k7wxxp job_status: Passed torchscript_onnx_qnn: - inference_time: 145276.0 - throughput: 6.883449434180457 + inference_time: 145992.0 + throughput: 6.849690393994192 estimated_peak_memory_range: - min: 10096640 - max: 11370376 + min: 10051584 + max: 11703232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 52 - job_id: jgjv2xk1g + job_id: jpv61dq75 job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:09:20Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T22:02:16Z' - torchscript_onnx_tflite: - inference_time: 149499.0 - throughput: 6.6890079532304565 + inference_time: 150322.0 + throughput: 6.652386210933862 estimated_peak_memory_range: - min: 667648 - max: 463674496 + min: 6733824 + max: 463312664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 32 - job_id: jgo2nye1p + job_id: jpy14xzrp job_status: Passed torchscript_onnx_qnn: - inference_time: 143873.0 - throughput: 6.950574464979531 + inference_time: 138474.0 + throughput: 7.221572280716957 estimated_peak_memory_range: - min: 10067968 - max: 11453480 + min: 10031104 + max: 11775936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 52 - job_id: jpedw9485 + job_id: jgjv07d7g job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:09:21Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T22:02:17Z' - torchscript_onnx_tflite: - inference_time: 166806.0 - throughput: 5.994988189873266 + inference_time: 275014.0 + throughput: 3.636178521820707 estimated_peak_memory_range: - min: 6754304 - max: 463169488 + min: 6836224 + max: 125774480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 32 - job_id: jpv6r3zz5 + job_id: jp0z1j425 job_status: Passed torchscript_onnx_qnn: - inference_time: 147675.0 - throughput: 6.771626883358727 + inference_time: 266041.0 + throughput: 3.758819129382313 estimated_peak_memory_range: - min: 10174464 - max: 11625936 + min: 811008 + max: 6479200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 52 - job_id: jgz3jev45 + job_id: jpedrzo75 job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:09:22Z' + chipset: SA8295P + timestamp: '2024-11-09T22:02:19Z' - torchscript_onnx_tflite: - inference_time: 292814.0 - throughput: 3.415137254366253 + inference_time: 361208.0 + throughput: 2.7684879626143384 estimated_peak_memory_range: - min: 6926336 - max: 415522352 + min: 8278016 + max: 413749216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 32 - job_id: jp0z3qln5 + job_id: jp8q3x2zp job_status: Passed torchscript_onnx_qnn: - inference_time: 278494.0 - throughput: 3.590741631776627 + inference_time: 290451.0 + throughput: 3.4429215254896697 estimated_peak_memory_range: - min: 8093696 - max: 98556528 + min: 9895936 + max: 105234128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 52 - job_id: jpede1vv5 + job_id: jgz3xm2z5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,13 +428,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:14:51Z' + timestamp: '2024-11-09T22:02:20Z' - torchscript_onnx_qnn: - inference_time: 135846.0 - throughput: 7.361276739837757 + inference_time: 135755.0 + throughput: 7.366211189274797 estimated_peak_memory_range: - min: 9850880 - max: 9850880 + min: 9854976 + max: 9854976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 52 - job_id: jp3jv3mng + job_id: jp3j49nxg job_status: Passed torchscript_onnx: - inference_time: 147350.0 - throughput: 6.78656260604004 + inference_time: 147500.0 + throughput: 6.779661016949152 estimated_peak_memory_range: - min: 56684544 - max: 56684544 + min: 56934400 + max: 56934400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jgdxe9vzp + job_id: jgdxr3nkp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:14:56Z' + timestamp: '2024-11-09T22:02:24Z' diff --git a/qai_hub_models/models/vit/README.md b/qai_hub_models/models/vit/README.md index cb79f499..09a84118 100644 --- a/qai_hub_models/models/vit/README.md +++ b/qai_hub_models/models/vit/README.md @@ -5,8 +5,7 @@ VIT is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of VIT found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/vit). diff --git a/qai_hub_models/models/vit/evaluate.py b/qai_hub_models/models/vit/evaluate.py index 91e37600..bf249af0 100644 --- a/qai_hub_models/models/vit/evaluate.py +++ b/qai_hub_models/models/vit/evaluate.py @@ -26,7 +26,6 @@ def main(): default_split_size=2500, supported_datasets=SUPPORTED_DATASETS, supports_qnn=False, - supports_precompiled_ort=False, ) args = parser.parse_args() args.device = None diff --git a/qai_hub_models/models/vit/export.py b/qai_hub_models/models/vit/export.py index 74e64029..40291af3 100644 --- a/qai_hub_models/models/vit/export.py +++ b/qai_hub_models/models/vit/export.py @@ -207,9 +207,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser( - model_cls=Model, supports_qnn=False, supports_precompiled_qnn_onnx=False - ) + parser = export_parser(model_cls=Model, supports_qnn=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/vit/perf.yaml b/qai_hub_models/models/vit/perf.yaml index 3a4e361e..cdf75d82 100644 --- a/qai_hub_models/models/vit/perf.yaml +++ b/qai_hub_models/models/vit/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: VIT performance_metrics: - torchscript_onnx_tflite: - inference_time: 20431.0 - throughput: 48.945230287308505 + inference_time: 20106.0 + throughput: 49.73639709539441 estimated_peak_memory_range: - min: 36864 - max: 2677272 + min: 45056 + max: 2714784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,52 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1579 - job_id: jp14ylw7p + job_id: jg9l3mnqg job_status: Passed torchscript_onnx: - inference_time: 15395.0 - throughput: 64.95615459564793 + inference_time: 15399.0 + throughput: 64.9392817715436 estimated_peak_memory_range: - min: 49152 - max: 203776536 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 976 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 976 - job_id: jgjvzmeeg - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:13:51Z' - - torchscript_onnx_tflite: - inference_time: 19857.0 - throughput: 50.36007453291031 - estimated_peak_memory_range: - min: 180224 - max: 3018568 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 1579 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 1579 - job_id: jgdxe9qzp - job_status: Passed - torchscript_onnx: - inference_time: 15480.0 - throughput: 64.59948320413437 - estimated_peak_memory_range: - min: 53248 - max: 202931648 + min: 69632 + max: 202862232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,7 +70,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 976 - job_id: jpede1kv5 + job_id: jgjv07v7g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -115,13 +79,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:13:52Z' + timestamp: '2024-11-09T22:01:16Z' - torchscript_onnx_tflite: - inference_time: 14705.0 - throughput: 68.00408024481469 + inference_time: 14711.0 + throughput: 67.9763442322072 estimated_peak_memory_range: - min: 20480 - max: 400445088 + min: 45056 + max: 402816608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -129,14 +93,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1579 - job_id: j57y0wl95 + job_id: jp14djzkp job_status: Passed torchscript_onnx: - inference_time: 11346.0 - throughput: 88.13678829543451 + inference_time: 11118.0 + throughput: 89.94423457456377 estimated_peak_memory_range: - min: 0 - max: 156044176 + min: 638976 + max: 156789088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -144,7 +108,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 976 - job_id: jgz3o9rx5 + job_id: jpedrzd75 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -153,13 +117,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:13:53Z' + timestamp: '2024-11-09T22:01:17Z' - torchscript_onnx_tflite: - inference_time: 13562.0 - throughput: 73.7354372511429 + inference_time: 13710.0 + throughput: 72.93946024799416 estimated_peak_memory_range: - min: 40960 - max: 217413344 + min: 20480 + max: 217816096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -167,14 +131,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1579 - job_id: jp2k069qp + job_id: jgdxr31kp job_status: Passed torchscript_onnx: - inference_time: 9034.0 - throughput: 110.69293779056896 + inference_time: 10634.0 + throughput: 94.03799134850479 estimated_peak_memory_range: - min: 667648 - max: 117357024 + min: 663552 + max: 117658832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -182,7 +146,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 976 - job_id: jg9lj1w8g + job_id: jgz3xm3z5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -191,13 +155,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:13:55Z' + timestamp: '2024-11-09T22:01:18Z' - torchscript_onnx_tflite: - inference_time: 19828.0 - throughput: 50.43373007867662 + inference_time: 19818.0 + throughput: 50.45917852457362 estimated_peak_memory_range: - min: 49152 - max: 1793862072 + min: 61440 + max: 3047456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -205,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1579 - job_id: jp4lkod15 + job_id: j57yj4rq5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -214,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:13:34Z' + timestamp: '2024-11-09T22:00:59Z' - torchscript_onnx_tflite: - inference_time: 19796.0 - throughput: 50.515255607193374 + inference_time: 20416.0 + throughput: 48.98119122257053 estimated_peak_memory_range: - min: 176128 - max: 208590744 + min: 49152 + max: 3458896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -228,7 +192,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1579 - job_id: jprv2k4kg + job_id: jp4lx1rq5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -237,13 +201,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:08:35Z' + timestamp: '2024-11-09T22:01:00Z' - torchscript_onnx_tflite: - inference_time: 20181.0 - throughput: 49.551558396511574 + inference_time: 20342.0 + throughput: 49.15937469275391 estimated_peak_memory_range: - min: 65536 - max: 2498008 + min: 28672 + max: 3189960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -251,7 +215,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1579 - job_id: jp2k9876p + job_id: j5mnwmxyp job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -260,13 +224,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:08:36Z' + timestamp: '2024-11-09T22:01:01Z' - torchscript_onnx_tflite: - inference_time: 19823.0 - throughput: 50.44645109216567 + inference_time: 19950.0 + throughput: 50.12531328320802 estimated_peak_memory_range: - min: 16384 - max: 2111880 + min: 61440 + max: 2640808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -274,7 +238,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1579 - job_id: jpy1je40p + job_id: jgn69n6v5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -283,13 +247,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:08:37Z' + timestamp: '2024-11-09T22:01:02Z' + - torchscript_onnx_tflite: + inference_time: 27983.0 + throughput: 35.73598256084051 + estimated_peak_memory_range: + min: 77824 + max: 201364336 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 1579 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 1579 + job_id: jprv40vvg + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T22:01:03Z' - torchscript_onnx_tflite: - inference_time: 25192.0 - throughput: 39.69514131470308 + inference_time: 25148.0 + throughput: 39.76459360585335 estimated_peak_memory_range: - min: 118784 - max: 385039712 + min: 20480 + max: 382762208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -297,7 +284,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1579 - job_id: jprv8q27g + job_id: jp2k7wkxp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -306,13 +293,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:13:38Z' + timestamp: '2024-11-09T22:01:04Z' - torchscript_onnx: - inference_time: 21699.0 - throughput: 46.085073044840776 + inference_time: 21682.0 + throughput: 46.12120653076285 estimated_peak_memory_range: - min: 179032064 - max: 179032064 + min: 179089408 + max: 179089408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -320,7 +307,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 976 - job_id: j5we2vqm5 + job_id: j5wed7ez5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -329,4 +316,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:13:54Z' + timestamp: '2024-11-09T22:01:19Z' diff --git a/qai_hub_models/models/vit_quantized/README.md b/qai_hub_models/models/vit_quantized/README.md index a4560b1a..90fcc7f6 100644 --- a/qai_hub_models/models/vit_quantized/README.md +++ b/qai_hub_models/models/vit_quantized/README.md @@ -5,8 +5,7 @@ VIT is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of VITQuantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/vit_quantized). diff --git a/qai_hub_models/models/vit_quantized/perf.yaml b/qai_hub_models/models/vit_quantized/perf.yaml index 7320f66a..1fe82a6f 100644 --- a/qai_hub_models/models/vit_quantized/perf.yaml +++ b/qai_hub_models/models/vit_quantized/perf.yaml @@ -46,49 +46,11 @@ models: - name: VITQuantized performance_metrics: - torchscript_onnx_qnn: - inference_time: 5436.0 - throughput: 183.95879323031642 - estimated_peak_memory_range: - min: 12288 - max: 31436272 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 903 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 903 - job_id: jg9ly0mmg - job_status: Passed - torchscript_onnx: - inference_time: 43360.0 - throughput: 23.062730627306273 - estimated_peak_memory_range: - min: 65536 - max: 269665184 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 1654 - layers_on_gpu: 0 - layers_on_cpu: 25 - total_layers: 1679 - job_id: jp8qm2eqp - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:38:29Z' - - torchscript_onnx_qnn: - inference_time: 5447.0 - throughput: 183.58729575913347 + inference_time: 5485.0 + throughput: 182.31540565177758 estimated_peak_memory_range: min: 16384 - max: 39424024 + max: 31082992 primary_compute_unit: NPU precision: int8 layer_info: @@ -96,22 +58,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 903 - job_id: jp14w2jnp + job_id: jg9l3q4qg job_status: Passed torchscript_onnx: - inference_time: 43415.0 - throughput: 23.033513762524475 + inference_time: 48235.0 + throughput: 20.73183373069348 estimated_peak_memory_range: - min: 81920 - max: 3129480 + min: 5058560 + max: 7523152 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 1654 + layers_on_npu: 1657 layers_on_gpu: 0 - layers_on_cpu: 25 - total_layers: 1679 - job_id: jgkeqv2vg + layers_on_cpu: 26 + total_layers: 1683 + job_id: jgn69w9m5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -120,13 +82,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:38:31Z' + timestamp: '2024-11-09T23:27:43Z' - torchscript_onnx_qnn: - inference_time: 3604.0 - throughput: 277.4694783573807 + inference_time: 3643.0 + throughput: 274.49903925336264 estimated_peak_memory_range: min: 163840 - max: 58570224 + max: 57646576 primary_compute_unit: NPU precision: int8 layer_info: @@ -134,22 +96,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 903 - job_id: jgdxqn36p + job_id: jp14dm8kp job_status: Passed torchscript_onnx: - inference_time: 41180.0 - throughput: 24.28363283147159 + inference_time: 35974.0 + throughput: 27.79785400567076 estimated_peak_memory_range: - min: 188416 - max: 800330656 + min: 3809280 + max: 808186736 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 1654 + layers_on_npu: 1657 layers_on_gpu: 0 - layers_on_cpu: 25 - total_layers: 1679 - job_id: j5q6r0lep + layers_on_cpu: 26 + total_layers: 1683 + job_id: jprv474eg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -158,13 +120,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:38:33Z' + timestamp: '2024-11-09T23:27:44Z' - torchscript_onnx_qnn: - inference_time: 3405.0 - throughput: 293.68575624082234 + inference_time: 3428.0 + throughput: 291.71528588098016 estimated_peak_memory_range: - min: 159744 - max: 76309984 + min: 0 + max: 75993248 primary_compute_unit: NPU precision: int8 layer_info: @@ -172,22 +134,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 903 - job_id: jpy1jzx0p + job_id: jgdxrmvkp job_status: Passed torchscript_onnx: - inference_time: 33509.0 - throughput: 29.84272881912322 + inference_time: 30672.0 + throughput: 32.60302556077204 estimated_peak_memory_range: - min: 282624 - max: 338616032 + min: 2617344 + max: 349967824 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 1654 + layers_on_npu: 1657 layers_on_gpu: 0 - layers_on_cpu: 25 - total_layers: 1679 - job_id: j56yz28np + layers_on_cpu: 26 + total_layers: 1683 + job_id: jp2k7z7mp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -196,13 +158,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:38:37Z' + timestamp: '2024-11-09T23:27:46Z' - torchscript_onnx_qnn: - inference_time: 22149.0 - throughput: 45.14876518127229 + inference_time: 22555.0 + throughput: 44.336067390822436 estimated_peak_memory_range: min: 217088 - max: 8125392 + max: 8360304 primary_compute_unit: NPU precision: int8 layer_info: @@ -210,7 +172,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 902 - job_id: j57yl24n5 + job_id: j5wedr9j5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -219,13 +181,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-27T00:38:14Z' + timestamp: '2024-11-09T23:27:28Z' - torchscript_onnx_qnn: - inference_time: 4924.0 - throughput: 203.08692120227457 + inference_time: 4961.0 + throughput: 201.57226365652087 estimated_peak_memory_range: - min: 180224 - max: 1365032 + min: 184320 + max: 1713552 primary_compute_unit: NPU precision: int8 layer_info: @@ -233,7 +195,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 903 - job_id: jp4ldn125 + job_id: jg9l3q4vg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -242,13 +204,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:38:15Z' + timestamp: '2024-11-09T23:27:30Z' - torchscript_onnx_qnn: - inference_time: 4935.0 - throughput: 202.63424518743668 + inference_time: 4972.0 + throughput: 201.1263073209976 estimated_peak_memory_range: - min: 221184 - max: 1724824 + min: 184320 + max: 1393872 primary_compute_unit: NPU precision: int8 layer_info: @@ -256,7 +218,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 903 - job_id: jpedw1q15 + job_id: jgdxrmvlp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -265,13 +227,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T02:01:51Z' + timestamp: '2024-11-09T23:27:34Z' - torchscript_onnx_qnn: - inference_time: 4952.0 - throughput: 201.93861066235866 + inference_time: 4975.0 + throughput: 201.00502512562815 estimated_peak_memory_range: - min: 86016 - max: 1862792 + min: 221184 + max: 1568392 primary_compute_unit: NPU precision: int8 layer_info: @@ -279,7 +241,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 903 - job_id: jgz3j96k5 + job_id: j57yj8jr5 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -288,13 +250,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T02:01:53Z' + timestamp: '2024-11-09T23:27:36Z' - torchscript_onnx_qnn: - inference_time: 4962.0 - throughput: 201.53164046755342 + inference_time: 4960.0 + throughput: 201.61290322580646 estimated_peak_memory_range: - min: 217088 - max: 1416008 + min: 176128 + max: 1801176 primary_compute_unit: NPU precision: int8 layer_info: @@ -302,7 +264,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 903 - job_id: jg9ly16lg + job_id: jp4lx2xl5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -311,13 +273,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T02:01:56Z' + timestamp: '2024-11-09T23:27:37Z' - torchscript_onnx_qnn: - inference_time: 7130.0 - throughput: 140.25245441795232 + inference_time: 6969.0 + throughput: 143.49261013057827 estimated_peak_memory_range: - min: 163840 - max: 5879552 + min: 221184 + max: 5884720 primary_compute_unit: NPU precision: int8 layer_info: @@ -325,7 +287,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 903 - job_id: j5we3vj65 + job_id: jpxk7z795 job_status: Passed reference_device_info: name: SA8295P ADP @@ -334,13 +296,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T02:01:55Z' + timestamp: '2024-11-09T23:27:39Z' - torchscript_onnx_qnn: - inference_time: 6515.0 - throughput: 153.49194167306217 + inference_time: 6261.0 + throughput: 159.71889474524835 estimated_peak_memory_range: - min: 163840 - max: 61632240 + min: 159744 + max: 60464608 primary_compute_unit: NPU precision: int8 layer_info: @@ -348,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 903 - job_id: jp2k9xw6p + job_id: j5mnwlwqp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -357,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:38:24Z' + timestamp: '2024-11-09T23:27:41Z' - torchscript_onnx_qnn: - inference_time: 5396.0 - throughput: 185.32246108228318 + inference_time: 5363.0 + throughput: 186.46280067126608 estimated_peak_memory_range: - min: 217088 - max: 217088 + min: 172032 + max: 172032 primary_compute_unit: NPU precision: int8 layer_info: @@ -371,22 +333,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 903 - job_id: jpxk69485 + job_id: jp14dm8lp job_status: Passed torchscript_onnx: - inference_time: 58817.0 - throughput: 17.001887209480252 + inference_time: 60097.0 + throughput: 16.639765712098775 estimated_peak_memory_range: - min: 239296512 - max: 239296512 + min: 236589056 + max: 236589056 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 1654 + layers_on_npu: 1657 layers_on_gpu: 0 - layers_on_cpu: 25 - total_layers: 1679 - job_id: jglv24y25 + layers_on_cpu: 26 + total_layers: 1683 + job_id: jpy14y44p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -395,4 +357,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:38:34Z' + timestamp: '2024-11-09T23:27:48Z' diff --git a/qai_hub_models/models/whisper_base_en/README.md b/qai_hub_models/models/whisper_base_en/README.md index e92925a2..23bd5f06 100644 --- a/qai_hub_models/models/whisper_base_en/README.md +++ b/qai_hub_models/models/whisper_base_en/README.md @@ -5,8 +5,7 @@ OpenAI’s Whisper ASR (Automatic Speech Recognition) model is a state-of-the-art system designed for transcribing spoken language into written text. It exhibits robust performance in realistic, noisy environments, making it highly reliable for real-world applications. Specifically, it excels in long-form transcription, capable of accurately transcribing audio clips up to 30 seconds long. Time to the first token is the encoder's latency, while time to each additional token is decoder's latency, where we assume a mean decoded length specified below. -This is based on the implementation of Whisper-Base-En found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/whisper_base_en). diff --git a/qai_hub_models/models/whisper_base_en/perf.yaml b/qai_hub_models/models/whisper_base_en/perf.yaml index 31879635..fb15f54a 100644 --- a/qai_hub_models/models/whisper_base_en/perf.yaml +++ b/qai_hub_models/models/whisper_base_en/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: WhisperEncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 214470.0 - throughput: 4.662656781834289 + inference_time: 203933.0 + throughput: 4.903571270956638 estimated_peak_memory_range: - min: 31571968 - max: 113910112 + min: 23183360 + max: 97878392 primary_compute_unit: GPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 408 layers_on_cpu: 11 total_layers: 419 - job_id: jpede1oo5 + job_id: jgjv0qm1g job_status: Passed torchscript_onnx_qnn: - inference_time: 282051.0 - throughput: 3.5454580909126365 + inference_time: 305337.0 + throughput: 3.2750698408643566 estimated_peak_memory_range: - min: 618496 - max: 74446856 + min: 40960 + max: 72512904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 531 - job_id: jglv6z4j5 + job_id: jpy14m0rp job_status: Passed torchscript_onnx: - inference_time: 307977.0 - throughput: 3.246995717212649 + inference_time: 306182.0 + throughput: 3.2660313147082456 estimated_peak_memory_range: - min: 12939264 - max: 148851288 + min: 12673024 + max: 148372608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 380 - job_id: jgn6lylq5 + job_id: j5mnw8yyp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:12:47Z' + timestamp: '2024-11-09T22:00:13Z' - torchscript_onnx_tflite: - inference_time: 201581.0 - throughput: 4.960784994617549 + inference_time: 157661.0 + throughput: 6.342722677136387 estimated_peak_memory_range: - min: 14741504 - max: 93156696 + min: 38940672 + max: 80752304 primary_compute_unit: GPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 408 layers_on_cpu: 11 total_layers: 419 - job_id: j5we2vw35 + job_id: jgz3xn945 job_status: Passed torchscript_onnx_qnn: - inference_time: 304267.0 - throughput: 3.28658710934804 + inference_time: 230887.0 + throughput: 4.331123016887049 estimated_peak_memory_range: - min: 638976 - max: 69609000 + min: 0 + max: 301063088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 531 - job_id: jp3jv3n3g + job_id: jp8q31vzp job_status: Passed torchscript_onnx: - inference_time: 317303.0 - throughput: 3.151561756428398 + inference_time: 221376.0 + throughput: 4.51720150332466 estimated_peak_memory_range: - min: 37015552 - max: 172544568 + min: 99360768 + max: 1129345776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 380 - job_id: jp2k060qp + job_id: jprv403vg job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:12:49Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T22:00:15Z' - torchscript_onnx_tflite: - inference_time: 165308.0 - throughput: 6.049314007791517 + inference_time: 169904.0 + throughput: 5.885676617383934 estimated_peak_memory_range: - min: 40554496 - max: 80601136 + min: 38920192 + max: 59434960 primary_compute_unit: GPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 408 layers_on_cpu: 11 total_layers: 419 - job_id: jp14yl28p + job_id: jg9l3d1mg job_status: Passed torchscript_onnx_qnn: - inference_time: 242116.0 - throughput: 4.130251614928381 + inference_time: 198914.0 + throughput: 5.027298229385564 estimated_peak_memory_range: min: 0 - max: 303473056 + max: 321617008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 531 - job_id: jpv60o0k5 + job_id: j5q67vo7p job_status: Passed torchscript_onnx: - inference_time: 225485.0 - throughput: 4.434884803867219 + inference_time: 194646.0 + throughput: 5.137531724258397 estimated_peak_memory_range: - min: 0 - max: 1027759776 + min: 78934016 + max: 743565104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 380 - job_id: jp0z3q3n5 + job_id: jpy14x3rp job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:12:51Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T22:00:17Z' - torchscript_onnx_tflite: - inference_time: 142711.0 - throughput: 7.007168333204868 + inference_time: 204676.0 + throughput: 4.885770681467295 estimated_peak_memory_range: - min: 45907968 - max: 66484176 + min: 12288 + max: 82573440 primary_compute_unit: GPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 408 layers_on_cpu: 11 total_layers: 419 - job_id: jgke7nvwg + job_id: jgdxr296p job_status: Passed torchscript_onnx_qnn: - inference_time: 175170.0 - throughput: 5.708740081064109 + inference_time: 220923.0 + throughput: 4.526463971610018 estimated_peak_memory_range: - min: 307200 - max: 322467648 + min: 229376 + max: 11296256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 531 - job_id: jpxknjnl5 - job_status: Passed - torchscript_onnx: - inference_time: 182521.0 - throughput: 5.4788216150470355 - estimated_peak_memory_range: - min: 18259968 - max: 682583760 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 380 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 380 - job_id: jglv6z2m5 + job_id: j56y3wlvp job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:12:55Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T21:59:59Z' - torchscript_onnx_tflite: - inference_time: 198396.0 - throughput: 5.040424202100849 + inference_time: 204043.0 + throughput: 4.900927745622246 estimated_peak_memory_range: - min: 20951040 - max: 103895560 + min: 16793600 + max: 95840912 primary_compute_unit: GPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 408 layers_on_cpu: 11 total_layers: 419 - job_id: j57y0w2v5 + job_id: jg9l3d1qg job_status: Passed torchscript_onnx_qnn: - inference_time: 225029.0 - throughput: 4.443871678761404 + inference_time: 197470.0 + throughput: 5.064060363599534 estimated_peak_memory_range: - min: 290816 - max: 11462576 + min: 143360 + max: 12116600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 531 - job_id: jpede1eo5 + job_id: jgjv0q47g job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:12:33Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T22:00:03Z' - torchscript_onnx_tflite: - inference_time: 205756.0 - throughput: 4.860125585645133 + inference_time: 196451.0 + throughput: 5.090327868017979 estimated_peak_memory_range: - min: 12333056 - max: 91320816 + min: 39968768 + max: 102636984 primary_compute_unit: GPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 408 layers_on_cpu: 11 total_layers: 419 - job_id: j5q6r87ep + job_id: jgdxr29kp job_status: Passed torchscript_onnx_qnn: - inference_time: 230126.0 - throughput: 4.345445538531066 + inference_time: 222117.0 + throughput: 4.50213175938807 estimated_peak_memory_range: - min: 303104 - max: 11331328 + min: 262144 + max: 11278024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 531 - job_id: jgjv2x01g + job_id: jgz3xnkz5 job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:07:58Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T22:00:05Z' - torchscript_onnx_tflite: - inference_time: 199965.0 - throughput: 5.000875153151801 + inference_time: 207860.0 + throughput: 4.810930433945925 estimated_peak_memory_range: - min: 4100096 - max: 87419416 + min: 23277568 + max: 105995584 primary_compute_unit: GPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 408 layers_on_cpu: 11 total_layers: 419 - job_id: j56yz63np + job_id: jp4lx3oq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 229665.0 - throughput: 4.354168027344175 + inference_time: 229820.0 + throughput: 4.351231398485772 estimated_peak_memory_range: - min: 737280 - max: 1828080 + min: 667648 + max: 1878944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 531 - job_id: jgz3jex45 + job_id: jg9l3deqg job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:08:00Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T22:00:07Z' - torchscript_onnx_tflite: - inference_time: 203025.0 - throughput: 4.925501785494397 + inference_time: 206208.0 + throughput: 4.8494723774053385 estimated_peak_memory_range: - min: 36864 - max: 86390624 + min: 37576704 + max: 62770032 primary_compute_unit: GPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 408 layers_on_cpu: 11 total_layers: 419 - job_id: jgo2ny11p + job_id: j5mnw82yp job_status: Passed torchscript_onnx_qnn: - inference_time: 231518.0 - throughput: 4.319318584300141 + inference_time: 224060.0 + throughput: 4.463090243684728 estimated_peak_memory_range: - min: 163840 - max: 10590368 + min: 671744 + max: 6516304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 531 - job_id: jg9lyv3mg + job_id: jgdxr2lkp job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:08:02Z' + chipset: SA8295P + timestamp: '2024-11-09T22:00:09Z' - torchscript_onnx_tflite: - inference_time: 333089.0 - throughput: 3.0022006130493653 + inference_time: 267331.0 + throughput: 3.7406810283880283 estimated_peak_memory_range: - min: 15073280 - max: 60792192 + min: 15638528 + max: 64220688 primary_compute_unit: GPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 408 layers_on_cpu: 11 total_layers: 419 - job_id: jp0z3q495 + job_id: jprv4wjvg job_status: Passed torchscript_onnx_qnn: - inference_time: 337527.0 - throughput: 2.962725944887372 + inference_time: 326290.0 + throughput: 3.064758343804591 estimated_peak_memory_range: min: 0 - max: 313627056 + max: 310779008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 531 - job_id: j57y0w095 + job_id: jp4lx30q5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:12:43Z' + timestamp: '2024-11-09T22:00:11Z' - torchscript_onnx_qnn: - inference_time: 179680.0 - throughput: 5.565449688334818 + inference_time: 179110.0 + throughput: 5.583161185863436 estimated_peak_memory_range: min: 483328 max: 483328 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 531 - job_id: j5we2v235 + job_id: jgo218q4p job_status: Passed torchscript_onnx: - inference_time: 308138.0 - throughput: 3.245299184131785 + inference_time: 308125.0 + throughput: 3.2454361054766734 estimated_peak_memory_range: - min: 139685888 - max: 139685888 + min: 139706368 + max: 139706368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 380 - job_id: jgke7n7ng + job_id: jp8q3xyzp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,15 +466,15 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:12:53Z' + timestamp: '2024-11-09T22:00:18Z' - name: WhisperDecoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 14373.0 - throughput: 69.57489737702637 + inference_time: 14300.0 + throughput: 69.93006993006993 estimated_peak_memory_range: - min: 3489792 - max: 6187024 + min: 5754880 + max: 7972488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -495,14 +482,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 983 - job_id: jgz3o92o5 + job_id: jpedry185 job_status: Passed torchscript_onnx_qnn: - inference_time: 4191.0 - throughput: 238.60653781913624 + inference_time: 4046.0 + throughput: 247.1576866040534 estimated_peak_memory_range: - min: 4448256 - max: 117330336 + min: 8929280 + max: 207146416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -510,14 +497,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 821 - job_id: j56yej26p + job_id: jp0z16725 job_status: Passed torchscript_onnx: - inference_time: 32260.0 - throughput: 30.998140111593305 + inference_time: 34336.0 + throughput: 29.12395153774464 estimated_peak_memory_range: - min: 81920 - max: 121862888 + min: 65536 + max: 122127904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -525,7 +512,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 844 - job_id: jprv8q87g + job_id: jgn69nvv5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -534,13 +521,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:12:47Z' + timestamp: '2024-11-09T22:00:14Z' - torchscript_onnx_tflite: - inference_time: 14516.0 - throughput: 68.88950124001103 + inference_time: 11516.0 + throughput: 86.83570684265369 estimated_peak_memory_range: - min: 5763072 - max: 8446584 + min: 4902912 + max: 103781216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -548,14 +535,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 983 - job_id: jg9lj10wg + job_id: j5wed4v45 job_status: Passed torchscript_onnx_qnn: - inference_time: 4045.0 - throughput: 247.21878862793574 + inference_time: 3045.0 + throughput: 328.4072249589491 estimated_peak_memory_range: - min: 21233664 - max: 33060888 + min: 21213184 + max: 63480704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -563,14 +550,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 821 - job_id: jgo2k0zqp + job_id: jgkel8myg job_status: Passed torchscript_onnx: - inference_time: 31939.0 - throughput: 31.30968408528758 + inference_time: 13486.0 + throughput: 74.15097137772504 estimated_peak_memory_range: - min: 81920 - max: 122326552 + min: 41914368 + max: 460493760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -578,22 +565,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 844 - job_id: jpy1rwrlp + job_id: jp2k7wyxp job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:12:50Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T22:00:15Z' - torchscript_onnx_tflite: - inference_time: 11755.0 - throughput: 85.07018290089324 + inference_time: 9886.0 + throughput: 101.15314586283634 estimated_peak_memory_range: - min: 5754880 - max: 103780112 + min: 3727360 + max: 55402448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -601,14 +588,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 983 - job_id: jgdxe9nrp + job_id: jp14d6lnp job_status: Passed torchscript_onnx_qnn: - inference_time: 3107.0 - throughput: 321.853878339234 + inference_time: 2743.0 + throughput: 364.5643456069996 estimated_peak_memory_range: - min: 21233664 - max: 60289424 + min: 21209088 + max: 57697968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -616,14 +603,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 821 - job_id: jgjvzmzvg + job_id: jglv0lre5 job_status: Passed torchscript_onnx: - inference_time: 15168.0 - throughput: 65.92827004219409 + inference_time: 12014.0 + throughput: 83.23622440486099 estimated_peak_memory_range: - min: 45375488 - max: 463220000 + min: 31739904 + max: 292579552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -631,22 +618,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 844 - job_id: jp8q090op + job_id: jp0z1j025 job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:12:52Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T22:00:17Z' - torchscript_onnx_tflite: - inference_time: 9891.0 - throughput: 101.1020119300374 + inference_time: 14182.0 + throughput: 70.51191651389085 estimated_peak_memory_range: - min: 28672 - max: 50675728 + min: 5754880 + max: 7909952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -654,14 +641,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 983 - job_id: j5q6ek0np + job_id: j5wed4vz5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2561.0 - throughput: 390.4724716907458 + inference_time: 4129.0 + throughput: 242.18939210462582 estimated_peak_memory_range: - min: 21229568 - max: 57882016 + min: 21303296 + max: 22372480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -669,37 +656,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 821 - job_id: j5mnq2q9p - job_status: Passed - torchscript_onnx: - inference_time: 11336.0 - throughput: 88.21453775582216 - estimated_peak_memory_range: - min: 31035392 - max: 292192576 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 844 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 844 - job_id: j56yejzyp + job_id: jp3j462xg job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:12:55Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T21:59:59Z' - torchscript_onnx_tflite: - inference_time: 14131.0 - throughput: 70.76640011322624 + inference_time: 14303.0 + throughput: 69.9154023631406 estimated_peak_memory_range: - min: 5775360 - max: 7451792 + min: 5787648 + max: 8036104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -707,14 +679,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 983 - job_id: jp4lkon85 + job_id: jp14d6lkp job_status: Passed torchscript_onnx_qnn: - inference_time: 4111.0 - throughput: 243.24981756263682 + inference_time: 4153.0 + throughput: 240.78979051288226 estimated_peak_memory_range: - min: 19906560 - max: 21169840 + min: 21295104 + max: 26163568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -722,22 +694,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 821 - job_id: jgz3o9oo5 + job_id: jpedry375 job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:12:33Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T22:00:04Z' - torchscript_onnx_tflite: - inference_time: 14438.0 - throughput: 69.26167059149466 + inference_time: 14602.0 + throughput: 68.48376934666484 estimated_peak_memory_range: min: 5746688 - max: 8089056 + max: 7825800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -745,14 +717,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 983 - job_id: jglv2n025 + job_id: j57yj9wq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 4180.0 - throughput: 239.23444976076556 + inference_time: 4152.0 + throughput: 240.84778420038535 estimated_peak_memory_range: - min: 19873792 - max: 21185552 + min: 21315584 + max: 22842784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -760,22 +732,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 821 - job_id: jpedw9r85 + job_id: j5wed4nz5 job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:07:58Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T22:00:06Z' - torchscript_onnx_tflite: - inference_time: 14270.0 - throughput: 70.0770847932726 + inference_time: 14508.0 + throughput: 68.92748828232699 estimated_peak_memory_range: min: 5754880 - max: 7732784 + max: 8174352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -783,14 +755,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 983 - job_id: jp3j1k4mg + job_id: jpxk7xjj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 4081.0 - throughput: 245.0379808870375 + inference_time: 4092.0 + throughput: 244.37927663734115 estimated_peak_memory_range: - min: 21254144 - max: 22506408 + min: 23216128 + max: 24521744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -798,22 +770,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 821 - job_id: j5we3od45 + job_id: jp14d6xkp job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:08:00Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T22:00:08Z' - torchscript_onnx_tflite: - inference_time: 14279.0 - throughput: 70.03291547027102 + inference_time: 15945.0 + throughput: 62.715584822828475 estimated_peak_memory_range: - min: 5750784 - max: 8454776 + min: 5771264 + max: 49763744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -821,14 +793,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 983 - job_id: jpv6r31z5 + job_id: jgn69k8v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 4212.0 - throughput: 237.41690408357076 + inference_time: 5501.0 + throughput: 181.78512997636793 estimated_peak_memory_range: - min: 21315584 - max: 22715240 + min: 18452480 + max: 24256560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -836,22 +808,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 821 - job_id: jp14w0dnp + job_id: j57yj93q5 job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:08:02Z' + chipset: SA8295P + timestamp: '2024-11-09T22:00:10Z' - torchscript_onnx_tflite: - inference_time: 16217.0 - throughput: 61.663686255164336 + inference_time: 16210.0 + throughput: 61.69031462060457 estimated_peak_memory_range: - min: 5812224 - max: 96326336 + min: 5775360 + max: 98003280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -859,14 +831,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 983 - job_id: jp8q092kp + job_id: jp2k7enxp job_status: Passed torchscript_onnx_qnn: - inference_time: 4819.0 - throughput: 207.5119319360863 + inference_time: 4862.0 + throughput: 205.67667626491155 estimated_peak_memory_range: - min: 3768320 - max: 40691952 + min: 21217280 + max: 60705488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -874,7 +846,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 821 - job_id: jp4lkok15 + job_id: jpxk7x2j5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -883,10 +855,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:12:44Z' + timestamp: '2024-11-09T22:00:12Z' - torchscript_onnx_qnn: - inference_time: 3704.0 - throughput: 269.97840172786175 + inference_time: 3762.0 + throughput: 265.8160552897395 estimated_peak_memory_range: min: 21229568 max: 21229568 @@ -897,14 +869,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 821 - job_id: jg9lj1jwg + job_id: jpv617x75 job_status: Passed torchscript_onnx: - inference_time: 14403.0 - throughput: 69.42997986530584 + inference_time: 14274.0 + throughput: 70.05744710662744 estimated_peak_memory_range: - min: 112254976 - max: 112254976 + min: 112214016 + max: 112214016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -912,7 +884,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 844 - job_id: j5q6ekeop + job_id: jgkel4xyg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -921,4 +893,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:12:53Z' + timestamp: '2024-11-09T22:00:19Z' diff --git a/qai_hub_models/models/whisper_small_en/README.md b/qai_hub_models/models/whisper_small_en/README.md index cb644d65..9224b34a 100644 --- a/qai_hub_models/models/whisper_small_en/README.md +++ b/qai_hub_models/models/whisper_small_en/README.md @@ -5,8 +5,7 @@ OpenAI’s Whisper ASR (Automatic Speech Recognition) model is a state-of-the-art system designed for transcribing spoken language into written text. It exhibits robust performance in realistic, noisy environments, making it highly reliable for real-world applications. Specifically, it excels in long-form transcription, capable of accurately transcribing audio clips up to 30 seconds long. Time to the first token is the encoder's latency, while time to each additional token is decoder's latency, where we assume a mean decoded length specified below. -This is based on the implementation of Whisper-Small-En found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/whisper_small_en). diff --git a/qai_hub_models/models/whisper_small_en/perf.yaml b/qai_hub_models/models/whisper_small_en/perf.yaml index 37b0509c..72d02ff4 100644 --- a/qai_hub_models/models/whisper_small_en/perf.yaml +++ b/qai_hub_models/models/whisper_small_en/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: WhisperEncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 702282.0 - throughput: 1.4239294186665756 + inference_time: 704009.0 + throughput: 1.4204363864666503 estimated_peak_memory_range: - min: 1060864 - max: 300454880 + min: 47362048 + max: 460023176 primary_compute_unit: GPU precision: fp16 layer_info: @@ -53,52 +55,14 @@ models: layers_on_gpu: 900 layers_on_cpu: 11 total_layers: 911 - job_id: jgke7zmwg + job_id: jp0z16n05 job_status: Passed torchscript_onnx_qnn: - inference_time: 859358.0 - throughput: 1.1636593829347024 + inference_time: 866620.0 + throughput: 1.1539082873693198 estimated_peak_memory_range: - min: 0 - max: 220397704 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 1329 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 1329 - job_id: jgn6lyvk5 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:11:06Z' - - torchscript_onnx_tflite: - inference_time: 695114.0 - throughput: 1.4386129469410773 - estimated_peak_memory_range: - min: 63426560 - max: 476415504 - primary_compute_unit: GPU - precision: fp16 - layer_info: - layers_on_npu: 0 - layers_on_gpu: 900 - layers_on_cpu: 11 - total_layers: 911 - job_id: jglv6nrj5 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 870805.0 - throughput: 1.1483627218493233 - estimated_peak_memory_range: - min: 57344 - max: 219497152 + min: 2539520 + max: 240981176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,7 +70,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1329 - job_id: jp2k06yrp + job_id: jpxk7xw85 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -115,13 +79,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:11:08Z' + timestamp: '2024-11-09T21:58:33Z' - torchscript_onnx_tflite: - inference_time: 547277.0 - throughput: 1.827228259181365 + inference_time: 514369.0 + throughput: 1.9441296034558848 estimated_peak_memory_range: - min: 116436992 - max: 207248224 + min: 113270784 + max: 205239136 primary_compute_unit: GPU precision: fp16 layer_info: @@ -129,14 +93,14 @@ models: layers_on_gpu: 900 layers_on_cpu: 11 total_layers: 911 - job_id: jp3jvk23g + job_id: jgkel8jvg job_status: Passed torchscript_onnx_qnn: - inference_time: 703838.0 - throughput: 1.4207814866489163 + inference_time: 615589.0 + throughput: 1.624460476064387 estimated_peak_memory_range: min: 0 - max: 876882656 + max: 879898416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -144,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1329 - job_id: jp0z3q095 + job_id: jgn69kjj5 job_status: Passed torchscript_onnx: - inference_time: 835138.0 - throughput: 1.1974068956268304 + inference_time: 808662.0 + throughput: 1.2366105987421196 estimated_peak_memory_range: - min: 143159296 - max: 4539018768 + min: 152461312 + max: 4546550480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,7 +123,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 884 - job_id: jgn6ly6k5 + job_id: j57yj9zn5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -168,13 +132,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:11:10Z' + timestamp: '2024-11-09T21:58:35Z' - torchscript_onnx_tflite: - inference_time: 544073.0 - throughput: 1.8379886522580609 + inference_time: 537047.0 + throughput: 1.862034421568317 estimated_peak_memory_range: - min: 116363264 - max: 145532224 + min: 116350976 + max: 145267744 primary_compute_unit: GPU precision: fp16 layer_info: @@ -182,14 +146,14 @@ models: layers_on_gpu: 900 layers_on_cpu: 11 total_layers: 911 - job_id: jpxknjo35 + job_id: jglv0lj25 job_status: Passed torchscript_onnx_qnn: - inference_time: 550657.0 - throughput: 1.8160125086941599 + inference_time: 494644.0 + throughput: 2.021655978845392 estimated_peak_memory_range: min: 0 - max: 953455456 + max: 952542336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -197,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1329 - job_id: jp14yl48p + job_id: jp2k7e26p job_status: Passed torchscript_onnx: - inference_time: 701331.0 - throughput: 1.4258602571396388 + inference_time: 698505.0 + throughput: 1.4316289790337935 estimated_peak_memory_range: - min: 84287488 - max: 2868743808 + min: 122765312 + max: 2907657184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,7 +176,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 884 - job_id: jp0z3qz95 + job_id: jpxk7xv85 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -221,13 +185,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:11:14Z' + timestamp: '2024-11-09T21:58:37Z' - torchscript_onnx_tflite: - inference_time: 697675.0 - throughput: 1.4333321388898843 + inference_time: 686962.0 + throughput: 1.4556845939076688 estimated_peak_memory_range: - min: 58421248 - max: 474956920 + min: 41578496 + max: 460926792 primary_compute_unit: GPU precision: fp16 layer_info: @@ -235,14 +199,14 @@ models: layers_on_gpu: 900 layers_on_cpu: 11 total_layers: 911 - job_id: jpv60okk5 + job_id: jp3j46ymg job_status: Passed torchscript_onnx_qnn: - inference_time: 669230.0 - throughput: 1.4942545910972311 + inference_time: 693295.0 + throughput: 1.4423874396901752 estimated_peak_memory_range: - min: 983040 - max: 2315136 + min: 991232 + max: 2320672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -250,7 +214,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1329 - job_id: jgke7nxwg + job_id: jp0z16y05 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -259,13 +223,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:10:51Z' + timestamp: '2024-11-09T21:58:18Z' - torchscript_onnx_tflite: - inference_time: 706138.0 - throughput: 1.4161537829716004 + inference_time: 711433.0 + throughput: 1.4056137401554327 estimated_peak_memory_range: - min: 59506688 - max: 470636296 + min: 18477056 + max: 372052424 primary_compute_unit: GPU precision: fp16 layer_info: @@ -273,14 +237,14 @@ models: layers_on_gpu: 900 layers_on_cpu: 11 total_layers: 911 - job_id: jgjv2xl1g + job_id: jpv617jz5 job_status: Passed torchscript_onnx_qnn: - inference_time: 701537.0 - throughput: 1.4254415661611575 + inference_time: 701700.0 + throughput: 1.4251104460595696 estimated_peak_memory_range: - min: 978944 - max: 2189840 + min: 954368 + max: 2314728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -288,7 +252,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1329 - job_id: jgdxqwv6p + job_id: jglv0ln25 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -297,13 +261,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:06:52Z' + timestamp: '2024-11-09T21:58:22Z' - torchscript_onnx_tflite: - inference_time: 709950.0 - throughput: 1.4085498978801323 + inference_time: 703544.0 + throughput: 1.4213752089421556 estimated_peak_memory_range: - min: 52367360 - max: 432018296 + min: 63836160 + max: 478995416 primary_compute_unit: GPU precision: fp16 layer_info: @@ -311,14 +275,14 @@ models: layers_on_gpu: 900 layers_on_cpu: 11 total_layers: 911 - job_id: jgz3je745 + job_id: jpedryj85 job_status: Passed torchscript_onnx_qnn: - inference_time: 696426.0 - throughput: 1.4359027376921596 + inference_time: 713618.0 + throughput: 1.4013099445361523 estimated_peak_memory_range: - min: 1257472 - max: 2730512 + min: 53248 + max: 59287184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -326,7 +290,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1329 - job_id: jp4ldqw25 + job_id: jp3j46kmg job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -335,13 +299,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:06:55Z' + timestamp: '2024-11-09T21:58:25Z' - torchscript_onnx_tflite: - inference_time: 675859.0 - throughput: 1.4795985553199706 + inference_time: 712376.0 + throughput: 1.4037530742192326 estimated_peak_memory_range: - min: 18444288 - max: 419772032 + min: 34390016 + max: 441873776 primary_compute_unit: GPU precision: fp16 layer_info: @@ -349,14 +313,14 @@ models: layers_on_gpu: 900 layers_on_cpu: 11 total_layers: 911 - job_id: jg9lyv4mg + job_id: j5wed4j45 job_status: Passed torchscript_onnx_qnn: - inference_time: 714713.0 - throughput: 1.3991630206810286 + inference_time: 724601.0 + throughput: 1.3800698591362695 estimated_peak_memory_range: - min: 876544 - max: 31904824 + min: 528384 + max: 30973328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -364,7 +328,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1329 - job_id: j5mn6rz7p + job_id: jpv6173z5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -373,13 +337,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:06:57Z' + timestamp: '2024-11-09T21:58:27Z' + - torchscript_onnx_tflite: + inference_time: 658817.0 + throughput: 1.517872186054701 + estimated_peak_memory_range: + min: 113360896 + max: 146778128 + primary_compute_unit: GPU + precision: fp16 + layer_info: + layers_on_npu: 0 + layers_on_gpu: 900 + layers_on_cpu: 11 + total_layers: 911 + job_id: jp14d6rnp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 727239.0 + throughput: 1.375063768582268 + estimated_peak_memory_range: + min: 2994176 + max: 8609984 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 1329 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 1329 + job_id: jpedry985 + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-11-09T21:58:29Z' - torchscript_onnx_tflite: - inference_time: 984007.0 - throughput: 1.0162529331600283 + inference_time: 972067.0 + throughput: 1.0287356735698259 estimated_peak_memory_range: - min: 48906240 - max: 150879280 + min: 78856192 + max: 178133312 primary_compute_unit: GPU precision: fp16 layer_info: @@ -387,7 +389,7 @@ models: layers_on_gpu: 900 layers_on_cpu: 11 total_layers: 911 - job_id: j57y0wrv5 + job_id: j57yj9qn5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -396,13 +398,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:11:02Z' + timestamp: '2024-11-09T21:58:31Z' - torchscript_onnx_qnn: - inference_time: 525786.0 - throughput: 1.9019144671025854 + inference_time: 526155.0 + throughput: 1.900580627381665 estimated_peak_memory_range: - min: 483328 - max: 483328 + min: 487424 + max: 487424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -410,14 +412,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1329 - job_id: jglv6zmj5 + job_id: jgkel8zvg job_status: Passed torchscript_onnx: - inference_time: 1355760.0 - throughput: 0.7375936743966484 + inference_time: 1356640.0 + throughput: 0.7371152258521052 estimated_peak_memory_range: - min: 469606400 - max: 469606400 + min: 470343680 + max: 470343680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -425,7 +427,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 884 - job_id: jp2k06krp + job_id: jgn69k2j5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -434,15 +436,15 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:11:12Z' + timestamp: '2024-11-09T21:58:39Z' - name: WhisperDecoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 25460.0 - throughput: 39.27729772191673 + inference_time: 25328.0 + throughput: 39.481996209728365 estimated_peak_memory_range: - min: 16424960 - max: 20186104 + min: 16769024 + max: 19702472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -450,14 +452,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2573 - job_id: j5q6e8onp + job_id: jp8q31lqp job_status: Passed torchscript_onnx_qnn: - inference_time: 11992.0 - throughput: 83.38892595063376 + inference_time: 12008.0 + throughput: 83.27781479013991 estimated_peak_memory_range: - min: 63565824 - max: 135579656 + min: 63447040 + max: 136635832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -465,14 +467,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2255 - job_id: jprv8q30g + job_id: j5mnw8j7p job_status: Passed torchscript_onnx: - inference_time: 56892.0 - throughput: 17.577163748857483 + inference_time: 57142.0 + throughput: 17.50026250393756 estimated_peak_memory_range: - min: 125771776 - max: 128531536 + min: 127242240 + max: 130025896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -480,7 +482,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2302 - job_id: jp4lkol85 + job_id: jgdxr2w6p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -489,13 +491,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:11:06Z' + timestamp: '2024-11-09T21:58:34Z' - torchscript_onnx_tflite: - inference_time: 25537.0 - throughput: 39.15886752555116 + inference_time: 19228.0 + throughput: 52.00748907842729 estimated_peak_memory_range: - min: 16830464 - max: 19474920 + min: 33447936 + max: 1199072864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -503,14 +505,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2573 - job_id: j56ye6l6p + job_id: j5q67vjep job_status: Passed torchscript_onnx_qnn: - inference_time: 11771.0 - throughput: 84.95454931611587 + inference_time: 9452.0 + throughput: 105.79771476936098 estimated_peak_memory_range: - min: 63619072 - max: 133090808 + min: 62328832 + max: 165690640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -518,37 +520,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2255 - job_id: jpy1rw38p - job_status: Passed - torchscript_onnx: - inference_time: 56196.0 - throughput: 17.7948608441882 - estimated_peak_memory_range: - min: 127221760 - max: 129746480 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 2302 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 2302 - job_id: j5mnq2ndp + job_id: jprv4wzkg job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:11:08Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T21:58:36Z' - torchscript_onnx_tflite: - inference_time: 19467.0 - throughput: 51.36898340781836 + inference_time: 16628.0 + throughput: 60.13952369497233 estimated_peak_memory_range: - min: 16781312 - max: 1182804848 + min: 16338944 + max: 275702160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -556,14 +543,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2573 - job_id: jgo2kyqqp + job_id: j56y3wknp job_status: Passed torchscript_onnx_qnn: - inference_time: 9374.0 - throughput: 106.67804565820354 + inference_time: 7482.0 + throughput: 133.65410318096767 estimated_peak_memory_range: - min: 52133888 - max: 151230592 + min: 52518912 + max: 192034176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -571,14 +558,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2255 - job_id: jp8q09ykp + job_id: jpy14m90p job_status: Passed torchscript_onnx: - inference_time: 46191.0 - throughput: 21.649239029248122 + inference_time: 39601.0 + throughput: 25.2518875785965 estimated_peak_memory_range: - min: 100257792 - max: 1649022832 + min: 116068352 + max: 926277936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -586,22 +573,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2302 - job_id: jprv8qv0g + job_id: j5mnw8r7p job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:11:10Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T21:58:37Z' - torchscript_onnx_tflite: - inference_time: 16519.0 - throughput: 60.53635207942369 + inference_time: 25840.0 + throughput: 38.69969040247678 estimated_peak_memory_range: - min: 15728640 - max: 275848528 + min: 16809984 + max: 19386192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -609,14 +596,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2573 - job_id: j5mnq2xdp + job_id: jgo218j1p job_status: Passed torchscript_onnx_qnn: - inference_time: 7481.0 - throughput: 133.6719689881032 + inference_time: 12335.0 + throughput: 81.07012565869478 estimated_peak_memory_range: - min: 57827328 - max: 198213120 + min: 59486208 + max: 60883184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -624,37 +611,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2255 - job_id: jgdxe9xrp - job_status: Passed - torchscript_onnx: - inference_time: 40763.0 - throughput: 24.532051124794545 - estimated_peak_memory_range: - min: 114851840 - max: 926892736 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 2302 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 2302 - job_id: jp8q09qkp + job_id: jp8q31oqp job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:11:14Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T21:58:19Z' - torchscript_onnx_tflite: - inference_time: 25303.0 - throughput: 39.52100541437774 + inference_time: 25356.0 + throughput: 39.43839722353684 estimated_peak_memory_range: - min: 14356480 - max: 17308352 + min: 15597568 + max: 19090296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -662,14 +634,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2573 - job_id: jgjvzmnvg + job_id: jgjv0qj1g job_status: Passed torchscript_onnx_qnn: - inference_time: 12303.0 - throughput: 81.28098837681866 + inference_time: 12620.0 + throughput: 79.23930269413628 estimated_peak_memory_range: - min: 63676416 - max: 64997624 + min: 66863104 + max: 68265408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -677,22 +649,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2255 - job_id: j5q6ekqnp + job_id: j56y3w6np job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:10:51Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T21:58:23Z' - torchscript_onnx_tflite: - inference_time: 24947.0 - throughput: 40.08498015793482 + inference_time: 25355.0 + throughput: 39.43995267205679 estimated_peak_memory_range: - min: 16769024 - max: 20149544 + min: 14774272 + max: 18719312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -700,14 +672,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2573 - job_id: jpedw9v85 + job_id: jgz3xn145 job_status: Passed torchscript_onnx_qnn: - inference_time: 12360.0 - throughput: 80.90614886731392 + inference_time: 12690.0 + throughput: 78.80220646178093 estimated_peak_memory_range: - min: 69128192 - max: 70376072 + min: 68390912 + max: 69606488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -715,22 +687,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2255 - job_id: j57ylzdn5 + job_id: jgo218y1p job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:06:53Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T21:58:25Z' - torchscript_onnx_tflite: - inference_time: 24952.0 - throughput: 40.07694773966015 + inference_time: 24580.0 + throughput: 40.68348250610252 estimated_peak_memory_range: - min: 15122432 - max: 18270984 + min: 15908864 + max: 18622600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -738,14 +710,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2573 - job_id: j5we3o945 + job_id: jg9l3d6mg job_status: Passed torchscript_onnx_qnn: - inference_time: 12829.0 - throughput: 77.94839816041781 + inference_time: 12971.0 + throughput: 77.09505820676894 estimated_peak_memory_range: - min: 63700992 - max: 64970008 + min: 63676416 + max: 65020752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -753,22 +725,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2255 - job_id: jpxk6v185 + job_id: jgjv0qx1g job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:06:55Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T21:58:27Z' - torchscript_onnx_tflite: - inference_time: 25800.0 - throughput: 38.75968992248062 + inference_time: 27039.0 + throughput: 36.98361625799771 estimated_peak_memory_range: - min: 16797696 - max: 20721232 + min: 16773120 + max: 255057072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -776,14 +748,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2573 - job_id: jp14w08np + job_id: jgdxr2j6p job_status: Passed torchscript_onnx_qnn: - inference_time: 12574.0 - throughput: 79.5291872117067 + inference_time: 14311.0 + throughput: 69.87631891551953 estimated_peak_memory_range: - min: 68235264 - max: 74270008 + min: 59531264 + max: 65428016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -791,22 +763,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2255 - job_id: jgn6m2ej5 + job_id: jgz3xne45 job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:06:57Z' + chipset: SA8295P + timestamp: '2024-11-09T21:58:29Z' - torchscript_onnx_tflite: - inference_time: 27021.0 - throughput: 37.008252840383406 + inference_time: 28297.0 + throughput: 35.33943527582429 estimated_peak_memory_range: - min: 16543744 - max: 1157785776 + min: 16838656 + max: 1157693088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -814,14 +786,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2573 - job_id: jp4lkor85 + job_id: jp4lx3z25 job_status: Passed torchscript_onnx_qnn: - inference_time: 14543.0 - throughput: 68.7616035205941 + inference_time: 15403.0 + throughput: 64.92241771083555 estimated_peak_memory_range: - min: 59359232 - max: 166031360 + min: 59473920 + max: 163105360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -829,7 +801,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2255 - job_id: jg9lj1lwg + job_id: jg9l3dvmg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -838,13 +810,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:11:02Z' + timestamp: '2024-11-09T21:58:32Z' - torchscript_onnx_qnn: - inference_time: 11129.0 - throughput: 89.85533291400844 + inference_time: 10907.0 + throughput: 91.68423947923353 estimated_peak_memory_range: - min: 63700992 - max: 63700992 + min: 63696896 + max: 63696896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -852,14 +824,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2255 - job_id: j56yej46p + job_id: j5q67v8ep job_status: Passed torchscript_onnx: - inference_time: 48314.0 - throughput: 20.697934346152255 + inference_time: 49551.0 + throughput: 20.181227422251823 estimated_peak_memory_range: - min: 242880512 - max: 242880512 + min: 242929664 + max: 242929664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -867,7 +839,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2302 - job_id: jpy1rw18p + job_id: jprv4wkkg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -876,4 +848,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:11:13Z' + timestamp: '2024-11-09T21:58:39Z' diff --git a/qai_hub_models/models/whisper_tiny_en/README.md b/qai_hub_models/models/whisper_tiny_en/README.md index 7de45b00..738b2ba3 100644 --- a/qai_hub_models/models/whisper_tiny_en/README.md +++ b/qai_hub_models/models/whisper_tiny_en/README.md @@ -5,8 +5,7 @@ OpenAI’s Whisper ASR (Automatic Speech Recognition) model is a state-of-the-art system designed for transcribing spoken language into written text. It exhibits robust performance in realistic, noisy environments, making it highly reliable for real-world applications. Specifically, it excels in long-form transcription, capable of accurately transcribing audio clips up to 30 seconds long. Time to the first token is the encoder's latency, while time to each additional token is decoder's latency, where we assume a mean decoded length specified below. -This is based on the implementation of Whisper-Tiny-En found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/whisper_tiny_en). diff --git a/qai_hub_models/models/whisper_tiny_en/perf.yaml b/qai_hub_models/models/whisper_tiny_en/perf.yaml index 7f240d3b..5c6b60e8 100644 --- a/qai_hub_models/models/whisper_tiny_en/perf.yaml +++ b/qai_hub_models/models/whisper_tiny_en/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: WhisperEncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 98253.0 - throughput: 10.17780627563535 + inference_time: 98056.0 + throughput: 10.198254058905116 estimated_peak_memory_range: - min: 19050496 - max: 137606584 + min: 15953920 + max: 138911672 primary_compute_unit: GPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 260 layers_on_cpu: 11 total_layers: 271 - job_id: jp0z3yy65 + job_id: jprv4w77g job_status: Passed torchscript_onnx_qnn: - inference_time: 137937.0 - throughput: 7.249686451060992 + inference_time: 141405.0 + throughput: 7.071885718326792 estimated_peak_memory_range: - min: 135168 - max: 49233088 + min: 36864 + max: 53664056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,7 +70,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 313 - job_id: jpxknvj15 + job_id: jgdxr2yzp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -77,13 +79,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:08:56Z' + timestamp: '2024-11-09T21:56:24Z' - torchscript_onnx_tflite: - inference_time: 99266.0 - throughput: 10.073942739709468 + inference_time: 76661.0 + throughput: 13.044442415308957 estimated_peak_memory_range: - min: 21000192 - max: 166527584 + min: 19304448 + max: 48771568 primary_compute_unit: GPU precision: fp16 layer_info: @@ -91,14 +93,14 @@ models: layers_on_gpu: 260 layers_on_cpu: 11 total_layers: 271 - job_id: jgke7zz2g + job_id: jpy14mylp job_status: Passed torchscript_onnx_qnn: - inference_time: 135876.0 - throughput: 7.359651446907474 + inference_time: 112275.0 + throughput: 8.90670229347584 estimated_peak_memory_range: - min: 40960 - max: 86463704 + min: 12288 + max: 199506960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,22 +108,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 313 - job_id: jgn6l2yr5 + job_id: jg9l3d7mg job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:08:58Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T21:56:26Z' - torchscript_onnx_tflite: - inference_time: 85303.0 - throughput: 11.722917130698804 + inference_time: 78773.0 + throughput: 12.69470503852843 estimated_peak_memory_range: - min: 22573056 - max: 51737280 + min: 19173376 + max: 39494560 primary_compute_unit: GPU precision: fp16 layer_info: @@ -129,14 +131,14 @@ models: layers_on_gpu: 260 layers_on_cpu: 11 total_layers: 271 - job_id: jglv6nn85 + job_id: jp8q31kop job_status: Passed torchscript_onnx_qnn: - inference_time: 112196.0 - throughput: 8.91297372455346 + inference_time: 101227.0 + throughput: 9.878787280073498 estimated_peak_memory_range: - min: 16384 - max: 194979168 + min: 0 + max: 203490080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -144,22 +146,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 313 - job_id: jp2k0864p + job_id: jgdxr2y6p job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:09:00Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T21:56:27Z' - torchscript_onnx_tflite: - inference_time: 78202.0 - throughput: 12.78739674177131 + inference_time: 102800.0 + throughput: 9.727626459143968 estimated_peak_memory_range: - min: 21049344 - max: 40819152 + min: 17108992 + max: 62755856 primary_compute_unit: GPU precision: fp16 layer_info: @@ -167,14 +169,14 @@ models: layers_on_gpu: 260 layers_on_cpu: 11 total_layers: 271 - job_id: j57y0zwl5 + job_id: j5q67vdop job_status: Passed torchscript_onnx_qnn: - inference_time: 91183.0 - throughput: 10.966956559885066 + inference_time: 102554.0 + throughput: 9.750960469606257 estimated_peak_memory_range: - min: 8192 - max: 203374528 + min: 237568 + max: 6136008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -182,22 +184,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 313 - job_id: j5we2on65 + job_id: jp4lx3625 job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:09:15Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T21:56:29Z' - torchscript_onnx_tflite: - inference_time: 100912.0 - throughput: 9.90962422704931 + inference_time: 122669.0 + throughput: 8.152018847467575 estimated_peak_memory_range: - min: 16580608 - max: 47343344 + min: 15433728 + max: 126636488 primary_compute_unit: GPU precision: fp16 layer_info: @@ -205,14 +207,14 @@ models: layers_on_gpu: 260 layers_on_cpu: 11 total_layers: 271 - job_id: jp3jvkklg + job_id: j56y3w9yp job_status: Passed torchscript_onnx_qnn: - inference_time: 110529.0 - throughput: 9.04739932506401 + inference_time: 104998.0 + throughput: 9.523990933160631 estimated_peak_memory_range: - min: 131072 - max: 10924816 + min: 688128 + max: 2003160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -220,22 +222,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 313 - job_id: jp0z3yq65 + job_id: jprv4wmkg job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:09:02Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T21:56:34Z' - torchscript_onnx_tflite: - inference_time: 97453.0 - throughput: 10.261356756590356 + inference_time: 99384.0 + throughput: 10.061981807936892 estimated_peak_memory_range: - min: 13987840 - max: 66811312 + min: 19021824 + max: 136506432 primary_compute_unit: GPU precision: fp16 layer_info: @@ -243,14 +245,14 @@ models: layers_on_gpu: 260 layers_on_cpu: 11 total_layers: 271 - job_id: jp4ldqv25 + job_id: jgo2187kp job_status: Passed torchscript_onnx_qnn: - inference_time: 109251.0 - throughput: 9.153234295338258 + inference_time: 106066.0 + throughput: 9.428091942752626 estimated_peak_memory_range: - min: 647168 - max: 1987224 + min: 167936 + max: 11049808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -258,22 +260,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 313 - job_id: jpy1je60p + job_id: jpy14mk0p job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:05:44Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T21:56:36Z' - torchscript_onnx_tflite: - inference_time: 100249.0 - throughput: 9.975161847000967 + inference_time: 101119.0 + throughput: 9.889338304374053 estimated_peak_memory_range: - min: 12288 - max: 128483488 + min: 16175104 + max: 64815224 primary_compute_unit: GPU precision: fp16 layer_info: @@ -281,14 +283,14 @@ models: layers_on_gpu: 260 layers_on_cpu: 11 total_layers: 271 - job_id: j5mn6r37p + job_id: jgjv0q6eg job_status: Passed torchscript_onnx_qnn: - inference_time: 104163.0 - throughput: 9.600337931895202 + inference_time: 104287.0 + throughput: 9.588922876293307 estimated_peak_memory_range: - min: 249856 - max: 6110200 + min: 237568 + max: 6061496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -296,22 +298,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 313 - job_id: jp8qmo7qp + job_id: jp8q31nqp job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:05:46Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T21:56:38Z' - torchscript_onnx_tflite: - inference_time: 98313.0 - throughput: 10.171594804349374 + inference_time: 104321.0 + throughput: 9.585797682154121 estimated_peak_memory_range: - min: 1802240 - max: 52134888 + min: 20946944 + max: 42412176 primary_compute_unit: GPU precision: fp16 layer_info: @@ -319,14 +321,14 @@ models: layers_on_gpu: 260 layers_on_cpu: 11 total_layers: 271 - job_id: jprv2kekg + job_id: jgz3xnqx5 job_status: Passed torchscript_onnx_qnn: - inference_time: 105132.0 - throughput: 9.511851767302058 + inference_time: 127364.0 + throughput: 7.85151220124996 estimated_peak_memory_range: - min: 110592 - max: 5275472 + min: 692224 + max: 6317648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -334,22 +336,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 313 - job_id: j5q6r82ep + job_id: j5q67vnep job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:05:48Z' + chipset: SA8295P + timestamp: '2024-11-09T21:56:40Z' - torchscript_onnx_tflite: - inference_time: 143543.0 - throughput: 6.966553576280279 + inference_time: 155230.0 + throughput: 6.442053726728081 estimated_peak_memory_range: - min: 21016576 - max: 57039808 + min: 20893696 + max: 58832672 primary_compute_unit: GPU precision: fp16 layer_info: @@ -357,14 +359,14 @@ models: layers_on_gpu: 260 layers_on_cpu: 11 total_layers: 271 - job_id: jp14y0l2p + job_id: jg9l3d78g job_status: Passed torchscript_onnx_qnn: - inference_time: 154739.0 - throughput: 6.462494910785257 + inference_time: 162925.0 + throughput: 6.137793463249961 estimated_peak_memory_range: - min: 0 - max: 205373424 + min: 86016 + max: 206091552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -372,7 +374,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 313 - job_id: jpede9115 + job_id: jp3j46dmg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -381,13 +383,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:09:13Z' + timestamp: '2024-11-09T21:56:42Z' - torchscript_onnx_qnn: - inference_time: 95112.0 - throughput: 10.513920430650181 + inference_time: 95469.0 + throughput: 10.474604321821744 estimated_peak_memory_range: - min: 512000 - max: 512000 + min: 503808 + max: 503808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -395,7 +397,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 313 - job_id: jgke7zn2g + job_id: j5mnw817p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -404,15 +406,15 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:09:04Z' + timestamp: '2024-11-09T21:56:32Z' - name: WhisperDecoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 3711.0 - throughput: 269.46914578280786 + inference_time: 4304.0 + throughput: 232.34200743494424 estimated_peak_memory_range: - min: 2981888 - max: 5390624 + min: 3006464 + max: 5326800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -420,14 +422,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: jp8q0ooxp + job_id: jp2k7ezqp job_status: Passed torchscript_onnx_qnn: - inference_time: 2307.0 - throughput: 433.4633723450368 + inference_time: 2254.0 + throughput: 443.6557231588287 estimated_peak_memory_range: - min: 4628480 - max: 15887264 + min: 1708032 + max: 160508336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -435,7 +437,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 447 - job_id: j5mnqr2wp + job_id: j5wed4045 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -444,13 +446,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:08:56Z' + timestamp: '2024-11-09T21:56:24Z' - torchscript_onnx_tflite: - inference_time: 3771.0 - throughput: 265.1816494298595 + inference_time: 2841.0 + throughput: 351.98873636043646 estimated_peak_memory_range: - min: 2977792 - max: 5426800 + min: 1351680 + max: 232100128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -458,14 +460,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: j5q6e884p + job_id: jp0z16xn5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2268.0 - throughput: 440.9171075837742 + inference_time: 1652.0 + throughput: 605.3268765133172 estimated_peak_memory_range: - min: 10625024 - max: 18482576 + min: 4624384 + max: 26236096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -473,22 +475,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 447 - job_id: jprv8kq9g + job_id: jp14d6knp job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:08:58Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T21:56:26Z' - torchscript_onnx_tflite: - inference_time: 3065.0 - throughput: 326.2642740619902 + inference_time: 2482.0 + throughput: 402.90088638195004 estimated_peak_memory_range: - min: 2166784 - max: 231270432 + min: 8192 + max: 32298592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -496,14 +498,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: j56ye660p + job_id: jgkel8kng job_status: Passed torchscript_onnx_qnn: - inference_time: 1627.0 - throughput: 614.6281499692686 + inference_time: 1524.0 + throughput: 656.1679790026246 estimated_peak_memory_range: - min: 4624384 - max: 29594960 + min: 4620288 + max: 29389344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -511,22 +513,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 447 - job_id: jpy1rew7p + job_id: j57yj91n5 job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:09:00Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T21:56:28Z' - torchscript_onnx_tflite: - inference_time: 2337.0 - throughput: 427.89901583226356 + inference_time: 3771.0 + throughput: 265.1816494298595 estimated_peak_memory_range: - min: 8192 - max: 32147232 + min: 2977792 + max: 5138568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -534,14 +536,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: jp4lkqov5 + job_id: jglv0l9m5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1395.0 - throughput: 716.8458781362007 + inference_time: 2363.0 + throughput: 423.1908590774439 estimated_peak_memory_range: - min: 4620288 - max: 29529184 + min: 5373952 + max: 6784144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -549,22 +551,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 447 - job_id: jg9ljvelg + job_id: jpxk7x885 job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:09:15Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T21:56:30Z' - torchscript_onnx_tflite: - inference_time: 4193.0 - throughput: 238.49272597185785 + inference_time: 3667.0 + throughput: 272.7024815925825 estimated_peak_memory_range: - min: 2994176 - max: 5159128 + min: 2977792 + max: 5301520 primary_compute_unit: NPU precision: fp16 layer_info: @@ -572,14 +574,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: jgo2kyyxp + job_id: jp3j46lng job_status: Passed torchscript_onnx_qnn: - inference_time: 2219.0 - throughput: 450.6534474988734 + inference_time: 2217.0 + throughput: 451.05999097880016 estimated_peak_memory_range: - min: 4653056 - max: 7279632 + min: 4648960 + max: 5995000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -587,22 +589,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 447 - job_id: jp8q0o9xp + job_id: jp2k7eq6p job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:09:02Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T21:56:35Z' - torchscript_onnx_tflite: - inference_time: 3758.0 - throughput: 266.0989888238425 + inference_time: 3725.0 + throughput: 268.4563758389262 estimated_peak_memory_range: - min: 3010560 - max: 5030648 + min: 3510272 + max: 5892336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -610,14 +612,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: jpxk6vy85 + job_id: jpv617yr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2236.0 - throughput: 447.2271914132379 + inference_time: 2262.0 + throughput: 442.0866489832007 estimated_peak_memory_range: - min: 8839168 - max: 10692144 + min: 9277440 + max: 12652992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -625,22 +627,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 447 - job_id: jp0z2yr05 + job_id: jp0z16w05 job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:05:45Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T21:56:37Z' - torchscript_onnx_tflite: - inference_time: 3633.0 - throughput: 275.2546105147261 + inference_time: 3680.0 + throughput: 271.7391304347826 estimated_peak_memory_range: - min: 2981888 - max: 5456640 + min: 2990080 + max: 5068440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -648,14 +650,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: jgn6m23j5 + job_id: jpedry0v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2241.0 - throughput: 446.2293618920125 + inference_time: 2266.0 + throughput: 441.306266548985 estimated_peak_memory_range: - min: 3301376 - max: 4564032 + min: 2629632 + max: 4530656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -663,22 +665,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 447 - job_id: jgkeqzyvg + job_id: jgkel81vg job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:05:46Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T21:56:39Z' - torchscript_onnx_tflite: - inference_time: 4320.0 - throughput: 231.4814814814815 + inference_time: 4786.0 + throughput: 208.94274968658587 estimated_peak_memory_range: - min: 2981888 - max: 4968808 + min: 2973696 + max: 30581968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -686,14 +688,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: jp2k98l6p + job_id: j5wed40m5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2257.0 - throughput: 443.06601683650865 + inference_time: 3334.0 + throughput: 299.9400119976005 estimated_peak_memory_range: - min: 9289728 - max: 10608312 + min: 9748480 + max: 15372496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -701,22 +703,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 447 - job_id: jglv2nk25 + job_id: jglv0ld25 job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:05:48Z' + chipset: SA8295P + timestamp: '2024-11-09T21:56:41Z' - torchscript_onnx_tflite: - inference_time: 4209.0 - throughput: 237.58612497030174 + inference_time: 4332.0 + throughput: 230.84025854108955 estimated_peak_memory_range: - min: 2973696 - max: 227566640 + min: 3002368 + max: 227451488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -724,14 +726,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: jgdxew9ep + job_id: jp14d6k7p job_status: Passed torchscript_onnx_qnn: - inference_time: 2642.0 - throughput: 378.5011355034065 + inference_time: 2710.0 + throughput: 369.00369003690037 estimated_peak_memory_range: min: 4624384 - max: 32276464 + max: 29174208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -739,7 +741,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 447 - job_id: jgz3oe9k5 + job_id: jgo218x1p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -748,10 +750,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:09:13Z' + timestamp: '2024-11-09T21:56:43Z' - torchscript_onnx_qnn: - inference_time: 2086.0 - throughput: 479.3863854266539 + inference_time: 2097.0 + throughput: 476.87172150691464 estimated_peak_memory_range: min: 10629120 max: 10629120 @@ -762,7 +764,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 447 - job_id: j5q6e8k4p + job_id: jgn69kdj5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -771,4 +773,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:09:05Z' + timestamp: '2024-11-09T21:56:33Z' diff --git a/qai_hub_models/models/wideresnet50/README.md b/qai_hub_models/models/wideresnet50/README.md index 30dcddb5..211980c5 100644 --- a/qai_hub_models/models/wideresnet50/README.md +++ b/qai_hub_models/models/wideresnet50/README.md @@ -5,8 +5,7 @@ WideResNet50 is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of WideResNet50 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/wideresnet50). diff --git a/qai_hub_models/models/wideresnet50/perf.yaml b/qai_hub_models/models/wideresnet50/perf.yaml index f81508c4..32b7b137 100644 --- a/qai_hub_models/models/wideresnet50/perf.yaml +++ b/qai_hub_models/models/wideresnet50/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: WideResNet50 performance_metrics: - torchscript_onnx_tflite: - inference_time: 4877.0 - throughput: 205.0440844781628 + inference_time: 4873.0 + throughput: 205.21239482864766 estimated_peak_memory_range: - min: 16384 - max: 2468560 + min: 28672 + max: 1758344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: j5mnqrjwp + job_id: jp4lx3415 job_status: Passed torchscript_onnx_qnn: - inference_time: 5702.0 - throughput: 175.377060680463 + inference_time: 5699.0 + throughput: 175.4693805930865 estimated_peak_memory_range: - min: 622592 - max: 237084808 + min: 618496 + max: 373022328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jglv6nj85 + job_id: jgkel80ng job_status: Passed torchscript_onnx: - inference_time: 5199.0 - throughput: 192.34468166955185 + inference_time: 5221.0 + throughput: 191.5341888527102 estimated_peak_memory_range: - min: 634880 - max: 2638872 + min: 12288 + max: 169259504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jp14y002p + job_id: j5wed4rm5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:08:13Z' + timestamp: '2024-11-09T21:55:39Z' - torchscript_onnx_tflite: - inference_time: 4883.0 - throughput: 204.7921359819783 + inference_time: 3583.0 + throughput: 279.09572983533354 estimated_peak_memory_range: - min: 20480 - max: 2030368 + min: 16384 + max: 107663520 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jgn6l2jr5 + job_id: jpxk7xrl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 5700.0 - throughput: 175.43859649122808 + inference_time: 4182.0 + throughput: 239.12003825920613 estimated_peak_memory_range: min: 618496 - max: 373527128 + max: 30381808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j56ye6k0p + job_id: j5q67v1op job_status: Passed torchscript_onnx: - inference_time: 5155.0 - throughput: 193.98642095053347 + inference_time: 3841.0 + throughput: 260.34886748242644 estimated_peak_memory_range: - min: 630784 - max: 3090248 + min: 618496 + max: 112608336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jgdxewwep + job_id: jg9l3dq8g job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:08:14Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T21:55:40Z' - torchscript_onnx_tflite: - inference_time: 3989.0 - throughput: 250.68939583855604 + inference_time: 3410.0 + throughput: 293.2551319648094 estimated_peak_memory_range: - min: 0 - max: 106695168 + min: 12288 + max: 33795312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jprv8kz9g + job_id: j5mnw8k9p job_status: Passed torchscript_onnx_qnn: - inference_time: 4782.0 - throughput: 209.11752404851526 + inference_time: 4100.0 + throughput: 243.90243902439025 estimated_peak_memory_range: - min: 618496 - max: 28017472 + min: 0 + max: 27022000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jp3jvkylg + job_id: jglv0lqm5 job_status: Passed torchscript_onnx: - inference_time: 4222.0 - throughput: 236.85457129322597 + inference_time: 3601.0 + throughput: 277.700638711469 estimated_peak_memory_range: min: 0 - max: 111699312 + max: 38128112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: j57y0zzl5 + job_id: jp14d6m7p job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:08:15Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T21:55:41Z' - torchscript_onnx_tflite: - inference_time: 3407.0 - throughput: 293.51335485764605 + inference_time: 4859.0 + throughput: 205.80366330520684 estimated_peak_memory_range: - min: 12288 - max: 33882128 + min: 32768 + max: 2115424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: j5q6e8j4p + job_id: jgn69kqq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3982.0 - throughput: 251.13008538422903 + inference_time: 5016.0 + throughput: 199.36204146730464 estimated_peak_memory_range: - min: 0 - max: 27235040 + min: 626688 + max: 1920360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jg9ljvvlg - job_status: Passed - torchscript_onnx: - inference_time: 3562.0 - throughput: 280.74115665356544 - estimated_peak_memory_range: - min: 0 - max: 38459568 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 128 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 128 - job_id: jpxknvv15 + job_id: j56y3w0yp job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:08:17Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T21:55:32Z' - torchscript_onnx_tflite: - inference_time: 4863.0 - throughput: 205.63438206868187 + inference_time: 4869.0 + throughput: 205.38098172109264 estimated_peak_memory_range: - min: 28672 - max: 2292520 + min: 16384 + max: 1908840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jp2k0824p + job_id: jprv4wd7g job_status: Passed torchscript_onnx_qnn: - inference_time: 5038.0 - throughput: 198.4914648670107 + inference_time: 5040.0 + throughput: 198.4126984126984 estimated_peak_memory_range: - min: 634880 - max: 1921904 + min: 655360 + max: 2015088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgo2kyjxp + job_id: jgo2189kp job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:08:06Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T21:55:34Z' - torchscript_onnx_tflite: - inference_time: 4866.0 - throughput: 205.5076037813399 + inference_time: 4863.0 + throughput: 205.63438206868187 estimated_peak_memory_range: - min: 28672 - max: 2042600 + min: 24576 + max: 2244176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jgz3jerx5 + job_id: jp2k7edqp job_status: Passed torchscript_onnx_qnn: - inference_time: 5031.0 - throughput: 198.76764062810574 + inference_time: 5039.0 + throughput: 198.45207382417146 estimated_peak_memory_range: - min: 319488 - max: 1686256 + min: 647168 + max: 1834344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jp14w0e7p + job_id: jpv617nr5 job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:05:15Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T21:55:35Z' - torchscript_onnx_tflite: - inference_time: 4875.0 - throughput: 205.12820512820514 + inference_time: 4873.0 + throughput: 205.21239482864766 estimated_peak_memory_range: - min: 28672 - max: 2123080 + min: 20480 + max: 2331560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: j5we3oqm5 + job_id: jpy14m2lp job_status: Passed torchscript_onnx_qnn: - inference_time: 5037.0 - throughput: 198.53087155052611 + inference_time: 5042.0 + throughput: 198.33399444664815 estimated_peak_memory_range: - min: 630784 - max: 1888200 + min: 634880 + max: 2219168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgdxqwozp + job_id: jgjv0q8eg job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:05:17Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T21:55:36Z' - torchscript_onnx_tflite: - inference_time: 4875.0 - throughput: 205.12820512820514 + inference_time: 8247.0 + throughput: 121.25621438098703 estimated_peak_memory_range: min: 20480 - max: 1743320 + max: 23413664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jg9lyvw8g + job_id: jp0z169n5 job_status: Passed torchscript_onnx_qnn: - inference_time: 5041.0 - throughput: 198.37333862328904 + inference_time: 8064.0 + throughput: 124.0079365079365 estimated_peak_memory_range: - min: 667648 - max: 2057416 + min: 651264 + max: 6335264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j5we3oq45 + job_id: jpedrynv5 job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:05:17Z' + chipset: SA8295P + timestamp: '2024-11-09T21:55:37Z' - torchscript_onnx_tflite: - inference_time: 7133.0 - throughput: 140.19346698443852 + inference_time: 7135.0 + throughput: 140.1541695865452 estimated_peak_memory_range: min: 20480 - max: 95312720 + max: 94957760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jgke7zj2g + job_id: jp8q31rop job_status: Passed torchscript_onnx_qnn: - inference_time: 7242.0 - throughput: 138.08340237503452 + inference_time: 7314.0 + throughput: 136.7240907847963 estimated_peak_memory_range: min: 618496 - max: 23270192 + max: 24182368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j5we2oo65 + job_id: jgz3xn0x5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:08:11Z' + timestamp: '2024-11-09T21:55:38Z' - torchscript_onnx_qnn: - inference_time: 4906.0 - throughput: 203.8320423970648 + inference_time: 4898.0 + throughput: 204.1649652919559 estimated_peak_memory_range: min: 602112 max: 602112 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jpv603jj5 + job_id: jp3j46rng job_status: Passed torchscript_onnx: - inference_time: 4669.0 - throughput: 214.17862497322767 + inference_time: 4673.0 + throughput: 213.99529210357372 estimated_peak_memory_range: - min: 139210752 - max: 139210752 + min: 139370496 + max: 139370496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jp4lkqqv5 + job_id: jgdxr2mzp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:08:16Z' + timestamp: '2024-11-09T21:55:42Z' diff --git a/qai_hub_models/models/wideresnet50_quantized/README.md b/qai_hub_models/models/wideresnet50_quantized/README.md index a64ee173..6950b4fb 100644 --- a/qai_hub_models/models/wideresnet50_quantized/README.md +++ b/qai_hub_models/models/wideresnet50_quantized/README.md @@ -5,8 +5,7 @@ WideResNet50 is a machine learning model that can classify images from the Imagenet dataset. It can also be used as a backbone in building more complex models for specific use cases. -This is based on the implementation of WideResNet50-Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/wideresnet50_quantized). diff --git a/qai_hub_models/models/wideresnet50_quantized/perf.yaml b/qai_hub_models/models/wideresnet50_quantized/perf.yaml index 9fe321a0..b82338d4 100644 --- a/qai_hub_models/models/wideresnet50_quantized/perf.yaml +++ b/qai_hub_models/models/wideresnet50_quantized/perf.yaml @@ -49,64 +49,11 @@ models: - name: WideResNet50-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1773.0 - throughput: 564.0157924421884 - estimated_peak_memory_range: - min: 16384 - max: 2407536 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 82 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 82 - job_id: jglv24l25 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 2024.0 - throughput: 494.0711462450593 - estimated_peak_memory_range: - min: 12288 - max: 135188344 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 127 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 127 - job_id: j57yl29n5 - job_status: Passed - torchscript_onnx: - inference_time: 2477.0 - throughput: 403.7141703673799 - estimated_peak_memory_range: - min: 16384 - max: 86171496 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 147 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 147 - job_id: jglv24x25 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:37:28Z' - - torchscript_onnx_tflite: - inference_time: 1779.0 - throughput: 562.1135469364812 + inference_time: 1778.0 + throughput: 562.429696287964 estimated_peak_memory_range: min: 24576 - max: 1994440 + max: 10847696 primary_compute_unit: NPU precision: int8 layer_info: @@ -114,14 +61,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: j56yz2wnp + job_id: jgn69w3v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2044.0 - throughput: 489.23679060665364 + inference_time: 2039.0 + throughput: 490.43648847474253 estimated_peak_memory_range: - min: 16384 - max: 144367368 + min: 36864 + max: 8914616 primary_compute_unit: NPU precision: int8 layer_info: @@ -129,22 +76,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jp4ldn325 - job_status: Passed - torchscript_onnx: - inference_time: 2494.0 - throughput: 400.962309542903 - estimated_peak_memory_range: - min: 12288 - max: 85976608 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 147 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 147 - job_id: j56yz27np + job_id: jgo21934p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -153,13 +85,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-27T00:37:30Z' + timestamp: '2024-11-09T23:26:30Z' - torchscript_onnx_tflite: - inference_time: 1350.0 - throughput: 740.7407407407408 + inference_time: 1351.0 + throughput: 740.1924500370096 estimated_peak_memory_range: - min: 12288 - max: 60677376 + min: 20480 + max: 61776240 primary_compute_unit: NPU precision: int8 layer_info: @@ -167,14 +99,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jp3j1n6mg + job_id: jprv47evg job_status: Passed torchscript_onnx_qnn: - inference_time: 1533.0 - throughput: 652.3157208088714 + inference_time: 1523.0 + throughput: 656.5988181221273 estimated_peak_memory_range: - min: 167936 - max: 20254304 + min: 0 + max: 20432576 primary_compute_unit: NPU precision: int8 layer_info: @@ -182,22 +114,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jpxk69x85 - job_status: Passed - torchscript_onnx: - inference_time: 1790.0 - throughput: 558.659217877095 - estimated_peak_memory_range: - min: 28672 - max: 91978592 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 147 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 147 - job_id: jp3j1n9mg + job_id: jpv61nv75 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -206,13 +123,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-27T00:37:31Z' + timestamp: '2024-11-09T23:26:31Z' - torchscript_onnx_tflite: inference_time: 1249.0 throughput: 800.640512409928 estimated_peak_memory_range: min: 8192 - max: 24995952 + max: 24574400 primary_compute_unit: NPU precision: int8 layer_info: @@ -220,14 +137,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jp14w26np + job_id: jp2k7zlxp job_status: Passed torchscript_onnx_qnn: - inference_time: 1495.0 - throughput: 668.8963210702341 + inference_time: 1343.0 + throughput: 744.6016381236038 estimated_peak_memory_range: min: 0 - max: 18498368 + max: 19206112 primary_compute_unit: NPU precision: int8 layer_info: @@ -235,22 +152,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jgkeqv4vg - job_status: Passed - torchscript_onnx: - inference_time: 1657.0 - throughput: 603.5003017501509 - estimated_peak_memory_range: - min: 0 - max: 42819504 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 147 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 147 - job_id: jpv6rqdz5 + job_id: jgjv08e7g job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -259,13 +161,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-27T00:37:34Z' + timestamp: '2024-11-09T23:26:33Z' - torchscript_onnx_tflite: - inference_time: 7784.0 - throughput: 128.46865364850976 + inference_time: 7797.0 + throughput: 128.25445684237528 estimated_peak_memory_range: min: 12288 - max: 30696256 + max: 29917600 primary_compute_unit: NPU precision: int8 layer_info: @@ -273,14 +175,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jgo2nz81p + job_id: jpy14y6rp job_status: Passed torchscript_onnx_qnn: - inference_time: 9329.0 - throughput: 107.19262514738986 + inference_time: 9819.0 + throughput: 101.84336490477645 estimated_peak_memory_range: - min: 163840 - max: 7940512 + min: 249856 + max: 8473376 primary_compute_unit: NPU precision: int8 layer_info: @@ -288,7 +190,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: j5mn6e87p + job_id: jpedrnk75 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -297,13 +199,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-27T00:37:12Z' + timestamp: '2024-11-09T23:26:16Z' - torchscript_onnx_tflite: - inference_time: 23763.0 - throughput: 42.08222867483062 + inference_time: 23983.0 + throughput: 41.69620147604553 estimated_peak_memory_range: - min: 53248 - max: 7683488 + min: 221184 + max: 6508496 primary_compute_unit: NPU precision: int8 layer_info: @@ -311,7 +213,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jpv6rq7z5 + job_id: jp0z1xl25 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -320,13 +222,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-27T00:36:52Z' + timestamp: '2024-11-09T23:25:57Z' - torchscript_onnx_tflite: - inference_time: 1781.0 - throughput: 561.4823133071309 + inference_time: 1776.0 + throughput: 563.063063063063 estimated_peak_memory_range: min: 28672 - max: 25200968 + max: 429131904 primary_compute_unit: NPU precision: int8 layer_info: @@ -334,14 +236,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jgjv2dq1g + job_id: jp8q3kzzp job_status: Passed torchscript_onnx_qnn: inference_time: 1916.0 throughput: 521.9206680584551 estimated_peak_memory_range: - min: 192512 - max: 1879744 + min: 180224 + max: 1482208 primary_compute_unit: NPU precision: int8 layer_info: @@ -349,7 +251,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jgn6m0kj5 + job_id: jgz3x0rz5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -358,13 +260,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-27T00:37:13Z' + timestamp: '2024-11-09T23:26:17Z' - torchscript_onnx_tflite: - inference_time: 1774.0 - throughput: 563.6978579481398 + inference_time: 1782.0 + throughput: 561.1672278338945 estimated_peak_memory_range: - min: 40960 - max: 39391872 + min: 24576 + max: 2601992 primary_compute_unit: NPU precision: int8 layer_info: @@ -372,14 +274,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jpxk6jq15 + job_id: jgkelk3yg job_status: Passed torchscript_onnx_qnn: - inference_time: 1921.0 - throughput: 520.5622071837585 + inference_time: 1922.0 + throughput: 520.2913631633714 estimated_peak_memory_range: - min: 204800 - max: 1551000 + min: 188416 + max: 1319880 primary_compute_unit: NPU precision: int8 layer_info: @@ -387,7 +289,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jp2k9614p + job_id: jg9l3qwqg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -396,13 +298,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T02:01:13Z' + timestamp: '2024-11-09T23:26:21Z' - torchscript_onnx_tflite: - inference_time: 1770.0 - throughput: 564.9717514124294 + inference_time: 1774.0 + throughput: 563.6978579481398 estimated_peak_memory_range: min: 16384 - max: 2316328 + max: 29894128 primary_compute_unit: NPU precision: int8 layer_info: @@ -410,14 +312,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: j5mn627wp + job_id: j5q67d37p job_status: Passed torchscript_onnx_qnn: - inference_time: 2030.0 - throughput: 492.61083743842363 + inference_time: 1927.0 + throughput: 518.9413596263622 estimated_peak_memory_range: - min: 180224 - max: 1544400 + min: 184320 + max: 1615200 primary_compute_unit: NPU precision: int8 layer_info: @@ -425,7 +327,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jpy1jwl7p + job_id: jp14dmekp job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -434,13 +336,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T02:01:15Z' + timestamp: '2024-11-09T23:26:23Z' - torchscript_onnx_tflite: inference_time: 1777.0 throughput: 562.7462014631401 estimated_peak_memory_range: - min: 16384 - max: 2166776 + min: 24576 + max: 38641408 primary_compute_unit: NPU precision: int8 layer_info: @@ -448,14 +350,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jprv2qr9g + job_id: jglv0q3e5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1920.0 - throughput: 520.8333333333334 + inference_time: 1918.0 + throughput: 521.376433785193 estimated_peak_memory_range: - min: 180224 - max: 1582008 + min: 184320 + max: 1547992 primary_compute_unit: NPU precision: int8 layer_info: @@ -463,7 +365,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jp8qm9nxp + job_id: jgdxrmokp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -472,13 +374,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T02:01:18Z' + timestamp: '2024-11-09T23:26:24Z' - torchscript_onnx_tflite: - inference_time: 2724.0 - throughput: 367.1071953010279 + inference_time: 2722.0 + throughput: 367.37692872887584 estimated_peak_memory_range: min: 12288 - max: 25004048 + max: 25634464 primary_compute_unit: NPU precision: int8 layer_info: @@ -486,14 +388,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jgn6my4r5 + job_id: j56y30nvp job_status: Passed torchscript_onnx_qnn: - inference_time: 3013.0 - throughput: 331.89512114171924 + inference_time: 3686.0 + throughput: 271.2967986977754 estimated_peak_memory_range: min: 0 - max: 5936192 + max: 5854992 primary_compute_unit: NPU precision: int8 layer_info: @@ -501,7 +403,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jp0z2qw65 + job_id: j57yj8xq5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -510,13 +412,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T02:01:16Z' + timestamp: '2024-11-09T23:26:26Z' - torchscript_onnx_tflite: - inference_time: 2165.0 - throughput: 461.8937644341801 + inference_time: 2164.0 + throughput: 462.1072088724584 estimated_peak_memory_range: - min: 32768 - max: 63239088 + min: 28672 + max: 63050000 primary_compute_unit: NPU precision: int8 layer_info: @@ -524,14 +426,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jg9ly0dmg + job_id: jp3j4rexg job_status: Passed torchscript_onnx_qnn: - inference_time: 2460.0 - throughput: 406.5040650406504 + inference_time: 2456.0 + throughput: 407.1661237785016 estimated_peak_memory_range: - min: 167936 - max: 24706480 + min: 172032 + max: 21401456 primary_compute_unit: NPU precision: int8 layer_info: @@ -539,7 +441,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jp8qm2xqp + job_id: jp4lx2vq5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -548,13 +450,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-27T00:37:23Z' + timestamp: '2024-11-09T23:26:28Z' - torchscript_onnx_qnn: - inference_time: 1848.0 - throughput: 541.1255411255411 + inference_time: 1861.0 + throughput: 537.345513164965 estimated_peak_memory_range: - min: 258048 - max: 258048 + min: 249856 + max: 249856 primary_compute_unit: NPU precision: int8 layer_info: @@ -562,22 +464,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jprv26wkg - job_status: Passed - torchscript_onnx: - inference_time: 2656.0 - throughput: 376.50602409638554 - estimated_peak_memory_range: - min: 74330112 - max: 74330112 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 147 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 147 - job_id: jgo2nzr1p + job_id: j5wedrqz5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -586,4 +473,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-27T00:37:33Z' + timestamp: '2024-11-09T23:26:34Z' diff --git a/qai_hub_models/models/xlsr/README.md b/qai_hub_models/models/xlsr/README.md index 53aa6944..84535328 100644 --- a/qai_hub_models/models/xlsr/README.md +++ b/qai_hub_models/models/xlsr/README.md @@ -5,8 +5,7 @@ XLSR is designed for lightweight real-time upscaling of images. -This is based on the implementation of XLSR found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/xlsr). diff --git a/qai_hub_models/models/xlsr/evaluate.py b/qai_hub_models/models/xlsr/evaluate.py new file mode 100644 index 00000000..f218f5b7 --- /dev/null +++ b/qai_hub_models/models/xlsr/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.xlsr import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["bsd300"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=100, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/xlsr/perf.yaml b/qai_hub_models/models/xlsr/perf.yaml index 735d7c73..4ebd34a4 100644 --- a/qai_hub_models/models/xlsr/perf.yaml +++ b/qai_hub_models/models/xlsr/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: XLSR performance_metrics: - torchscript_onnx_tflite: - inference_time: 2455.0 - throughput: 407.33197556008145 + inference_time: 2476.0 + throughput: 403.8772213247173 estimated_peak_memory_range: - min: 4427776 - max: 11552112 + min: 856064 + max: 9533624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: jgdxewkep + job_id: jg9l3do8g job_status: Passed torchscript_onnx_qnn: - inference_time: 1359.0 - throughput: 735.8351729212657 + inference_time: 1360.0 + throughput: 735.2941176470588 estimated_peak_memory_range: - min: 28672 - max: 3184984 + min: 212992 + max: 68334864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jp0z3yw65 + job_id: jp2k7eoqp job_status: Passed torchscript_onnx: - inference_time: 1482.0 - throughput: 674.7638326585695 + inference_time: 1504.0 + throughput: 664.8936170212766 estimated_peak_memory_range: - min: 208896 - max: 1830664 + min: 212992 + max: 1507536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 23 - job_id: jpede9q15 + job_id: jpv617mr5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:07:27Z' + timestamp: '2024-11-09T21:54:54Z' - torchscript_onnx_tflite: - inference_time: 2694.0 - throughput: 371.19524870081665 + inference_time: 1692.0 + throughput: 591.016548463357 estimated_peak_memory_range: - min: 32768 - max: 1637216 + min: 20480 + max: 25344640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: j57y0zml5 + job_id: jp14d6o7p job_status: Passed torchscript_onnx_qnn: - inference_time: 1349.0 - throughput: 741.2898443291327 + inference_time: 827.0 + throughput: 1209.1898428053205 estimated_peak_memory_range: - min: 16384 - max: 67319576 + min: 0 + max: 10170864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jp8q0onxp + job_id: jpy14m8lp job_status: Passed torchscript_onnx: - inference_time: 1503.0 - throughput: 665.335994677312 + inference_time: 1044.0 + throughput: 957.8544061302682 estimated_peak_memory_range: min: 0 - max: 1708952 + max: 24077856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 23 - job_id: jgz3oe6k5 + job_id: jgjv0qyeg job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:07:29Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T21:54:55Z' - torchscript_onnx_tflite: - inference_time: 1753.0 - throughput: 570.4506560182544 + inference_time: 1553.0 + throughput: 643.915003219575 estimated_peak_memory_range: - min: 20480 - max: 24554192 + min: 12288 + max: 16833472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: jp4lkq7v5 + job_id: jgdxr26zp job_status: Passed torchscript_onnx_qnn: - inference_time: 1088.0 - throughput: 919.1176470588235 + inference_time: 792.0 + throughput: 1262.6262626262626 estimated_peak_memory_range: - min: 208896 - max: 12227408 + min: 0 + max: 9618592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jgke7z12g + job_id: jp0z16on5 job_status: Passed torchscript_onnx: - inference_time: 1069.0 - throughput: 935.4536950420954 + inference_time: 1067.0 + throughput: 937.207122774133 estimated_peak_memory_range: min: 0 - max: 24032544 + max: 15098400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 23 - job_id: j5we2oj65 + job_id: jpedryxv5 job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:07:30Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T21:54:56Z' - torchscript_onnx_tflite: - inference_time: 1550.0 - throughput: 645.1612903225806 + inference_time: 2538.0 + throughput: 394.01103230890465 estimated_peak_memory_range: - min: 12288 - max: 16744512 + min: 28672 + max: 8336240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: jpy1rel7p + job_id: j57yj9o95 job_status: Passed torchscript_onnx_qnn: - inference_time: 841.0 - throughput: 1189.0606420927468 + inference_time: 1339.0 + throughput: 746.8259895444362 estimated_peak_memory_range: - min: 0 - max: 9539680 + min: 229376 + max: 1955536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jgjvzx9xg - job_status: Passed - torchscript_onnx: - inference_time: 1064.0 - throughput: 939.8496240601504 - estimated_peak_memory_range: - min: 0 - max: 15595888 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 23 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 23 - job_id: jp14y0r2p + job_id: jp8q31jop job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:07:32Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T21:54:46Z' - torchscript_onnx_tflite: - inference_time: 2499.0 - throughput: 400.16006402561027 + inference_time: 2515.0 + throughput: 397.61431411530816 estimated_peak_memory_range: min: 16384 - max: 84255056 + max: 8697112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: jpxknvq15 + job_id: jp4lx3e15 job_status: Passed torchscript_onnx_qnn: - inference_time: 1339.0 - throughput: 746.8259895444362 + inference_time: 1332.0 + throughput: 750.7507507507507 estimated_peak_memory_range: - min: 233472 - max: 1424224 + min: 229376 + max: 1426064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: j5q6e8n4p + job_id: j5q67v4op job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:07:20Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T21:54:48Z' - torchscript_onnx_tflite: - inference_time: 2459.0 - throughput: 406.669377795852 + inference_time: 2607.0 + throughput: 383.5826620636747 estimated_peak_memory_range: - min: 24576 - max: 8372472 + min: 20480 + max: 1474608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: jgkeqzqng + job_id: jpxk7x0l5 job_status: Passed torchscript_onnx_qnn: inference_time: 1353.0 throughput: 739.0983000739099 estimated_peak_memory_range: - min: 233472 - max: 1491600 + min: 225280 + max: 1554720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: j56yz6nyp + job_id: jglv0l8m5 job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:04:50Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T21:54:49Z' - torchscript_onnx_tflite: - inference_time: 2426.0 - throughput: 412.20115416323165 + inference_time: 2586.0 + throughput: 386.69760247486465 estimated_peak_memory_range: - min: 28672 - max: 8568816 + min: 16384 + max: 9373264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: j5q6r8rop + job_id: j5mnw899p job_status: Passed torchscript_onnx_qnn: - inference_time: 1344.0 - throughput: 744.047619047619 + inference_time: 1355.0 + throughput: 738.0073800738007 estimated_peak_memory_range: - min: 229376 - max: 1643504 + min: 225280 + max: 1538416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jp3j1keng + job_id: j56y3wmyp job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:04:52Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T21:54:51Z' - torchscript_onnx_tflite: - inference_time: 2438.0 - throughput: 410.17227235438884 + inference_time: 3916.0 + throughput: 255.3626149131767 estimated_peak_memory_range: - min: 20480 - max: 23504704 + min: 6311936 + max: 22320832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: jglv2n3m5 + job_id: jgn69k1q5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1338.0 - throughput: 747.3841554559043 + inference_time: 2328.0 + throughput: 429.553264604811 estimated_peak_memory_range: - min: 229376 - max: 1952672 + min: 212992 + max: 6179424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jgo2ny3kp + job_id: jp3j467ng job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:04:53Z' + chipset: SA8295P + timestamp: '2024-11-09T21:54:52Z' - torchscript_onnx_tflite: - inference_time: 3135.0 - throughput: 318.9792663476874 + inference_time: 3326.0 + throughput: 300.6614552014432 estimated_peak_memory_range: min: 6311936 - max: 32067120 + max: 32189616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: jp2k0814p + job_id: jprv4wx7g job_status: Passed torchscript_onnx_qnn: inference_time: 1553.0 throughput: 643.915003219575 estimated_peak_memory_range: min: 208896 - max: 15726864 + max: 14906544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jpv6038j5 + job_id: jgo218wkp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:07:25Z' + timestamp: '2024-11-09T21:54:53Z' - torchscript_onnx_qnn: - inference_time: 1474.0 - throughput: 678.42605156038 + inference_time: 1523.0 + throughput: 656.5988181221273 estimated_peak_memory_range: min: 237568 max: 237568 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jglv6nd85 + job_id: jgkel86ng job_status: Passed torchscript_onnx: - inference_time: 1528.0 - throughput: 654.4502617801047 + inference_time: 1535.0 + throughput: 651.4657980456026 estimated_peak_memory_range: - min: 8994816 - max: 8994816 + min: 8835072 + max: 8835072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 23 - job_id: jg9ljv6lg + job_id: jgz3xnyx5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:07:31Z' + timestamp: '2024-11-09T21:54:57Z' diff --git a/qai_hub_models/models/xlsr_quantized/README.md b/qai_hub_models/models/xlsr_quantized/README.md index dbc4c468..c5624b58 100644 --- a/qai_hub_models/models/xlsr_quantized/README.md +++ b/qai_hub_models/models/xlsr_quantized/README.md @@ -5,8 +5,7 @@ XLSR is designed for lightweight real-time upscaling of images. -This is based on the implementation of XLSR-Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/xlsr_quantized). diff --git a/qai_hub_models/models/xlsr_quantized/conftest.py b/qai_hub_models/models/xlsr_quantized/conftest.py index 9faafe9f..8506cf44 100644 --- a/qai_hub_models/models/xlsr_quantized/conftest.py +++ b/qai_hub_models/models/xlsr_quantized/conftest.py @@ -9,7 +9,6 @@ import pytest from qai_hub_models.models.xlsr_quantized import Model -from qai_hub_models.utils.testing import skip_clone_repo_check # Instantiate the model only once for all tests. @@ -22,7 +21,6 @@ def cached_from_pretrained(): from_pretrained = Model.from_pretrained sig = inspect.signature(from_pretrained) - @skip_clone_repo_check def _cached_from_pretrained(*args, **kwargs): cache_key = str(args) + str(kwargs) model = pretrained_cache.get(cache_key, None) diff --git a/qai_hub_models/models/xlsr_quantized/evaluate.py b/qai_hub_models/models/xlsr_quantized/evaluate.py new file mode 100644 index 00000000..5e64c3c5 --- /dev/null +++ b/qai_hub_models/models/xlsr_quantized/evaluate.py @@ -0,0 +1,55 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.xlsr_quantized import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["bsd300"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=100, + supported_datasets=SUPPORTED_DATASETS, + is_hub_quantized=True, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/xlsr_quantized/export.py b/qai_hub_models/models/xlsr_quantized/export.py index f7d2ef46..a771f7d4 100644 --- a/qai_hub_models/models/xlsr_quantized/export.py +++ b/qai_hub_models/models/xlsr_quantized/export.py @@ -13,6 +13,7 @@ from typing import Any, Optional, cast import qai_hub as hub +import torch from qai_hub_models.models.common import ExportResult, TargetRuntime from qai_hub_models.models.xlsr_quantized import Model @@ -22,6 +23,7 @@ get_model_kwargs, ) from qai_hub_models.utils.compare import torch_inference +from qai_hub_models.utils.input_spec import make_torch_inputs from qai_hub_models.utils.printing import ( print_inference_metrics, print_on_target_demo_cmd, @@ -31,11 +33,14 @@ can_access_qualcomm_ai_hub, export_without_hub_access, ) +from qai_hub_models.utils.quantization import get_calibration_data def export_model( device: Optional[str] = None, chipset: Optional[str] = None, + num_calibration_samples: int = 100, + skip_compiling: bool = False, skip_profiling: bool = False, skip_inferencing: bool = False, skip_downloading: bool = False, @@ -50,13 +55,14 @@ def export_model( This function executes the following recipe: 1. Instantiates a PyTorch model and converts it to a traced TorchScript format - 2. Compiles the model to an asset that can be run on device - 3. Profiles the model performance on a real device - 4. Inferences the model on sample inputs - 5. Downloads the model asset to the local directory - 6. Summarizes the results from profiling and inference + 2. Converts the PyTorch model to ONNX and quantizes the ONNX model. + 3. Compiles the model to an asset that can be run on device + 4. Profiles the model performance on a real device + 5. Inferences the model on sample inputs + 6. Downloads the model asset to the local directory + 7. Summarizes the results from profiling and inference - Each of the last 4 steps can be optionally skipped using the input options. + Each of the last 5 steps can be optionally skipped using the input options. Parameters: device: Device for which to export the model. @@ -64,6 +70,9 @@ def export_model( Defaults to DEFAULT_DEVICE if not specified. chipset: If set, will choose a random device with this chipset. Overrides the `device` argument. + num_calibration_samples: The number of calibration data samples + to use for quantization. + skip_compiling: If set, skips compiling model to format that can run on device. skip_profiling: If set, skips profiling of compiled model on real devices. skip_inferencing: If set, skips computing on-device outputs from sample data. skip_downloading: If set, skips downloading of compiled model. @@ -79,9 +88,10 @@ def export_model( Returns: A struct of: - * A CompileJob object containing metadata about the compile job submitted to hub. + * A CompileJob object containing metadata about the compile job submitted to hub (None if compiling skipped). * An InferenceJob containing metadata about the inference job (None if inferencing skipped). * A ProfileJob containing metadata about the profile job (None if profiling skipped). + * A QuantizeJob object containing metadata about the quantize job submitted to hub """ model_name = "xlsr_quantized" output_path = Path(output_dir or Path.cwd() / "build" / model_name) @@ -115,26 +125,45 @@ def export_model( ) # Trace the model - source_model = model.convert_to_hub_source_model( - target_runtime, output_path, input_spec + source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) + + print(f"Quantizing model {model_name} with {num_calibration_samples} samples.") + # 2. Converts the PyTorch model to ONNX and quantizes the ONNX model. + onnx_compile_job = hub.submit_compile_job( + model=source_model, + input_specs=input_spec, + device=hub_device, + name=model_name, + options="--target_runtime onnx", + ) + quantize_job = hub.submit_quantize_job( + model=onnx_compile_job.get_target_model(), + calibration_data=get_calibration_data( + input_spec, "bsd300", num_calibration_samples + ), + weights_dtype=model.get_weights_dtype(), + activations_dtype=model.get_activations_dtype(), + name=model_name, + options=model.get_quantize_options(), ) + if skip_compiling: + return ExportResult(quantize_job=quantize_job) - # 2. Compiles the model to an asset that can be run on device + # 3. Compiles the model to an asset that can be run on device model_compile_options = model.get_hub_compile_options( target_runtime, compile_options, hub_device ) print(f"Optimizing model {model_name} to run on-device") submitted_compile_job = hub.submit_compile_job( - model=source_model, + model=quantize_job.get_target_model(), input_specs=input_spec, device=hub_device, name=model_name, - calibration_data=model.get_calibration_data(target_runtime), options=model_compile_options, ) compile_job = cast(hub.client.CompileJob, submitted_compile_job) - # 3. Profiles the model performance on a real device + # 4. Profiles the model performance on a real device profile_job: Optional[hub.client.ProfileJob] = None if not skip_profiling: profile_options_all = model.get_hub_profile_options( @@ -149,7 +178,7 @@ def export_model( ) profile_job = cast(hub.client.ProfileJob, submitted_profile_job) - # 4. Inferences the model on sample inputs + # 5. Inferences the model on sample inputs inference_job: Optional[hub.client.InferenceJob] = None if not skip_inferencing: profile_options_all = model.get_hub_profile_options( @@ -170,13 +199,13 @@ def export_model( ) inference_job = cast(hub.client.InferenceJob, submitted_inference_job) - # 5. Downloads the model asset to the local directory + # 6. Downloads the model asset to the local directory if not skip_downloading: os.makedirs(output_path, exist_ok=True) target_model: hub.Model = compile_job.get_target_model() # type: ignore target_model.download(str(output_path / model_name)) - # 6. Summarizes the results from profiling and inference + # 7. Summarizes the results from profiling and inference if not skip_summary and not skip_profiling: assert profile_job is not None and profile_job.wait().success profile_data: dict[str, Any] = profile_job.download_profile() # type: ignore @@ -201,12 +230,13 @@ def export_model( compile_job=compile_job, inference_job=inference_job, profile_job=profile_job, + quantize_job=quantize_job, ) def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model) + parser = export_parser(model_cls=Model, is_hub_quantized=True) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/xlsr_quantized/model.py b/qai_hub_models/models/xlsr_quantized/model.py index c4115c75..91484d7a 100644 --- a/qai_hub_models/models/xlsr_quantized/model.py +++ b/qai_hub_models/models/xlsr_quantized/model.py @@ -4,77 +4,11 @@ # --------------------------------------------------------------------- from __future__ import annotations -# isort: off -# This verifies aimet is installed, and this must be included first. -from qai_hub_models.utils.quantization_aimet import ( - AIMETQuantizableMixin, - constrain_quantized_inputs_to_image_range, -) - -# isort: on -import torch -from aimet_torch.cross_layer_equalization import equalize_model -from aimet_torch.model_preparer import prepare_model -from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim - -from qai_hub_models.models._shared.super_resolution.model import DEFAULT_SCALE_FACTOR from qai_hub_models.models.xlsr.model import XLSR -from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset +from qai_hub_models.utils.quantization import HubQuantizableMixin MODEL_ID = __name__.split(".")[-2] -MODEL_ASSET_VERSION = 3 -DEFAULT_ENCODINGS = "xlsr_quantized_encodings.json" - - -class XLSRQuantizable(AIMETQuantizableMixin, XLSR): - """XLSR with post training quantization suport - - Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints. - Support for quantizing using your own weights & data will come at a later date.""" - - def __init__( - self, - xlsr_model: QuantizationSimModel, - scale_factor: int, - ) -> None: - XLSR.__init__(self, xlsr_model.model, scale_factor) - AIMETQuantizableMixin.__init__(self, xlsr_model) - - @classmethod - def from_pretrained( - cls, - aimet_encodings: str | None = "DEFAULT", - scale_factor: int = DEFAULT_SCALE_FACTOR, - ) -> XLSRQuantizable: - """ - Parameters: - aimet_encodings: - if "DEFAULT": Loads the model with aimet encodings calibrated on BSD300. - elif None: Doesn't load any encodings. Used when computing encodings. - else: Interprets as a filepath and loads the encodings stored there. - """ - fp16_model = XLSR.from_pretrained(scale_factor) - input_shape = cls.get_input_spec()["image"][0] - - model = prepare_model(fp16_model) - equalize_model(model, input_shape) - - sim = QuantizationSimModel( - model, - quant_scheme="tf_enhanced", - default_param_bw=8, - default_output_bw=8, - config_file=get_default_aimet_config(), - dummy_input=torch.rand(input_shape), - ) - constrain_quantized_inputs_to_image_range(sim) - if aimet_encodings: - if aimet_encodings == "DEFAULT": - aimet_encodings = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_ENCODINGS - ).fetch() - load_encodings_to_sim(sim, aimet_encodings) - return cls(sim, scale_factor) +class XLSRQuantizable(HubQuantizableMixin, XLSR): + pass diff --git a/qai_hub_models/models/xlsr_quantized/perf.yaml b/qai_hub_models/models/xlsr_quantized/perf.yaml index a4db7684..12159898 100644 --- a/qai_hub_models/models/xlsr_quantized/perf.yaml +++ b/qai_hub_models/models/xlsr_quantized/perf.yaml @@ -49,11 +49,11 @@ models: - name: XLSR-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1131.0 - throughput: 884.1732979664014 + inference_time: 1065.0 + throughput: 938.9671361502348 estimated_peak_memory_range: - min: 24576 - max: 1520408 + min: 12288 + max: 1482384 primary_compute_unit: NPU precision: int8 layer_info: @@ -61,14 +61,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jpede9n05 + job_id: jg9l3qjqg job_status: Passed torchscript_onnx_qnn: - inference_time: 658.0 - throughput: 1519.756838905775 + inference_time: 652.0 + throughput: 1533.7423312883436 estimated_peak_memory_range: - min: 16384 - max: 3265184 + min: 0 + max: 10693104 primary_compute_unit: NPU precision: int8 layer_info: @@ -76,22 +76,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jp2k08qmp + job_id: jp0z1x225 job_status: Passed torchscript_onnx: - inference_time: 689.0 - throughput: 1451.3788098693758 + inference_time: 4078.0 + throughput: 245.21824423737127 estimated_peak_memory_range: - min: 65536 - max: 1526672 + min: 11354112 + max: 13084280 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 19 + layers_on_npu: 63 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 19 - job_id: jpede9005 + layers_on_cpu: 12 + total_layers: 75 + job_id: j5wedr3z5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -100,13 +100,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:06:48Z' + timestamp: '2024-11-09T23:25:09Z' - torchscript_onnx_tflite: - inference_time: 1055.0 - throughput: 947.8672985781991 + inference_time: 883.0 + throughput: 1132.5028312570782 estimated_peak_memory_range: min: 16384 - max: 15370408 + max: 22615264 primary_compute_unit: NPU precision: int8 layer_info: @@ -114,67 +114,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jgz3oe065 + job_id: jp14dmykp job_status: Passed torchscript_onnx_qnn: - inference_time: 651.0 - throughput: 1536.0983102918588 - estimated_peak_memory_range: - min: 16384 - max: 2948640 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 21 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 21 - job_id: jpy1rek4p - job_status: Passed - torchscript_onnx: - inference_time: 707.0 - throughput: 1414.4271570014143 + inference_time: 448.0 + throughput: 2232.1428571428573 estimated_peak_memory_range: - min: 65536 - max: 1444024 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 19 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 19 - job_id: jgz3oeq65 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:06:49Z' - - torchscript_onnx_tflite: - inference_time: 889.0 - throughput: 1124.859392575928 - estimated_peak_memory_range: - min: 24576 - max: 23547520 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 16 - layers_on_gpu: 0 - layers_on_cpu: 3 - total_layers: 19 - job_id: j5we2orj5 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 451.0 - throughput: 2217.2949002217297 - estimated_peak_memory_range: - min: 61440 - max: 12105296 + min: 0 + max: 15489248 primary_compute_unit: NPU precision: int8 layer_info: @@ -182,22 +129,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jp0z3y8e5 + job_id: jp8q3kmzp job_status: Passed torchscript_onnx: - inference_time: 506.0 - throughput: 1976.2845849802372 + inference_time: 3038.0 + throughput: 329.1639236339697 estimated_peak_memory_range: min: 0 - max: 25219712 + max: 36353552 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 19 + layers_on_npu: 63 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 19 - job_id: j5we2o0j5 + layers_on_cpu: 12 + total_layers: 75 + job_id: jg9l3qyqg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -206,13 +153,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:06:50Z' + timestamp: '2024-11-09T23:25:12Z' - torchscript_onnx_tflite: - inference_time: 1157.0 - throughput: 864.304235090752 + inference_time: 1063.0 + throughput: 940.7337723424271 estimated_peak_memory_range: - min: 16384 - max: 16771840 + min: 0 + max: 16026496 primary_compute_unit: NPU precision: int8 layer_info: @@ -220,14 +167,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jgn6l2dm5 + job_id: jgdxrmekp job_status: Passed torchscript_onnx_qnn: - inference_time: 404.0 - throughput: 2475.2475247524753 + inference_time: 484.0 + throughput: 2066.115702479339 estimated_peak_memory_range: min: 57344 - max: 10827520 + max: 10813616 primary_compute_unit: NPU precision: int8 layer_info: @@ -235,22 +182,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jpv603ym5 + job_id: jgkelkqyg job_status: Passed torchscript_onnx: - inference_time: 462.0 - throughput: 2164.5021645021643 + inference_time: 3057.0 + throughput: 327.11808963035656 estimated_peak_memory_range: - min: 61440 - max: 16877568 + min: 0 + max: 26586624 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 19 + layers_on_npu: 63 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 19 - job_id: jp14y0klp + layers_on_cpu: 12 + total_layers: 75 + job_id: jp14dmwkp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -259,13 +206,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:06:52Z' + timestamp: '2024-11-09T23:25:14Z' - torchscript_onnx_tflite: - inference_time: 2182.0 - throughput: 458.29514207149407 + inference_time: 2274.0 + throughput: 439.7537379067722 estimated_peak_memory_range: - min: 16384 - max: 17104816 + min: 12288 + max: 16281840 primary_compute_unit: NPU precision: int8 layer_info: @@ -273,14 +220,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jg9ljvqvg + job_id: j57yj80q5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1104.0 - throughput: 905.7971014492754 + inference_time: 1123.0 + throughput: 890.4719501335708 estimated_peak_memory_range: - min: 94208 - max: 7873856 + min: 36864 + max: 7330624 primary_compute_unit: NPU precision: int8 layer_info: @@ -288,7 +235,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jp8q0od8p + job_id: j5q67dr7p job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -297,13 +244,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-26T23:06:38Z' + timestamp: '2024-11-09T23:24:45Z' - torchscript_onnx_tflite: - inference_time: 15902.0 - throughput: 62.885171676518674 + inference_time: 16434.0 + throughput: 60.849458439819884 estimated_peak_memory_range: - min: 4251648 - max: 10432624 + min: 4227072 + max: 14379000 primary_compute_unit: GPU precision: int8 layer_info: @@ -311,7 +258,7 @@ models: layers_on_gpu: 9 layers_on_cpu: 5 total_layers: 19 - job_id: jp14y0mlp + job_id: jp4lx2kq5 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -320,13 +267,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-26T23:06:27Z' + timestamp: '2024-11-09T23:24:17Z' - torchscript_onnx_tflite: - inference_time: 1064.0 - throughput: 939.8496240601504 + inference_time: 1065.0 + throughput: 938.9671361502348 estimated_peak_memory_range: - min: 28672 - max: 1385448 + min: 12288 + max: 69342320 primary_compute_unit: NPU precision: int8 layer_info: @@ -334,14 +281,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jgdxewmlp + job_id: jpxk7znj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 427.0 - throughput: 2341.92037470726 + inference_time: 432.0 + throughput: 2314.814814814815 estimated_peak_memory_range: - min: 20480 - max: 1809008 + min: 0 + max: 1585800 primary_compute_unit: NPU precision: int8 layer_info: @@ -349,7 +296,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jgke7zwog + job_id: jglv0q2e5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -358,13 +305,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:06:39Z' + timestamp: '2024-11-09T23:24:48Z' - torchscript_onnx_tflite: - inference_time: 1064.0 - throughput: 939.8496240601504 + inference_time: 1106.0 + throughput: 904.1591320072333 estimated_peak_memory_range: min: 16384 - max: 13900624 + max: 18234664 primary_compute_unit: NPU precision: int8 layer_info: @@ -372,14 +319,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jgdxqwqzp + job_id: j5mnwlqyp job_status: Passed torchscript_onnx_qnn: inference_time: 433.0 throughput: 2309.4688221709007 estimated_peak_memory_range: - min: 73728 - max: 1343608 + min: 81920 + max: 1554440 primary_compute_unit: NPU precision: int8 layer_info: @@ -387,7 +334,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: j5mn6r69p + job_id: jp3j4r1xg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -396,13 +343,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:04:32Z' + timestamp: '2024-11-09T23:24:54Z' - torchscript_onnx_tflite: - inference_time: 1051.0 - throughput: 951.4747859181732 + inference_time: 1073.0 + throughput: 931.9664492078285 estimated_peak_memory_range: - min: 28672 - max: 1430944 + min: 12288 + max: 1507024 primary_compute_unit: NPU precision: int8 layer_info: @@ -410,14 +357,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: j57ylzl95 + job_id: jgn69wmv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 431.0 - throughput: 2320.185614849188 + inference_time: 428.0 + throughput: 2336.448598130841 estimated_peak_memory_range: min: 81920 - max: 2365040 + max: 1435712 primary_compute_unit: NPU precision: int8 layer_info: @@ -425,7 +372,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jgn6m2mq5 + job_id: jgo219n4p job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -434,13 +381,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:04:33Z' + timestamp: '2024-11-09T23:24:57Z' - torchscript_onnx_tflite: - inference_time: 1072.0 - throughput: 932.8358208955224 + inference_time: 1065.0 + throughput: 938.9671361502348 estimated_peak_memory_range: - min: 28672 - max: 1500536 + min: 24576 + max: 1563200 primary_compute_unit: NPU precision: int8 layer_info: @@ -448,14 +395,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jpxk6v6l5 + job_id: jprv472vg job_status: Passed torchscript_onnx_qnn: - inference_time: 433.0 - throughput: 2309.4688221709007 + inference_time: 430.0 + throughput: 2325.5813953488373 estimated_peak_memory_range: - min: 77824 - max: 1558056 + min: 81920 + max: 1408160 primary_compute_unit: NPU precision: int8 layer_info: @@ -463,7 +410,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jp2k989qp + job_id: jpv61nr75 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -472,13 +419,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:04:35Z' + timestamp: '2024-11-09T23:25:00Z' - torchscript_onnx_tflite: - inference_time: 1883.0 - throughput: 531.0674455655868 + inference_time: 1884.0 + throughput: 530.7855626326964 estimated_peak_memory_range: - min: 20480 - max: 15243504 + min: 16384 + max: 15114784 primary_compute_unit: NPU precision: int8 layer_info: @@ -486,14 +433,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jp4ldqd15 + job_id: jp2k7z9xp job_status: Passed torchscript_onnx_qnn: - inference_time: 916.0 - throughput: 1091.703056768559 + inference_time: 896.0 + throughput: 1116.0714285714287 estimated_peak_memory_range: min: 12288 - max: 5685088 + max: 5893424 primary_compute_unit: NPU precision: int8 layer_info: @@ -501,7 +448,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jprv2k27g + job_id: jgjv0827g job_status: Passed reference_device_info: name: SA8295P ADP @@ -510,13 +457,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T01:04:34Z' + timestamp: '2024-11-09T23:25:03Z' - torchscript_onnx_tflite: - inference_time: 1484.0 - throughput: 673.8544474393531 + inference_time: 1656.0 + throughput: 603.864734299517 estimated_peak_memory_range: - min: 20480 - max: 24065680 + min: 1617920 + max: 25717616 primary_compute_unit: NPU precision: int8 layer_info: @@ -524,14 +471,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: j5mnqr1qp + job_id: jpy14yjrp job_status: Passed torchscript_onnx_qnn: - inference_time: 703.0 - throughput: 1422.475106685633 + inference_time: 723.0 + throughput: 1383.1258644536654 estimated_peak_memory_range: min: 61440 - max: 17160464 + max: 16379360 primary_compute_unit: NPU precision: int8 layer_info: @@ -539,7 +486,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jgo2ky7dp + job_id: jpedrnw75 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -548,13 +495,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:06:45Z' + timestamp: '2024-11-09T23:25:06Z' - torchscript_onnx_qnn: - inference_time: 528.0 - throughput: 1893.939393939394 + inference_time: 539.0 + throughput: 1855.287569573284 estimated_peak_memory_range: - min: 286720 - max: 286720 + min: 196608 + max: 196608 primary_compute_unit: NPU precision: int8 layer_info: @@ -562,22 +509,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: j5q6e8xmp + job_id: j56y30zvp job_status: Passed torchscript_onnx: - inference_time: 786.0 - throughput: 1272.264631043257 + inference_time: 7761.0 + throughput: 128.84937508053085 estimated_peak_memory_range: - min: 3301376 - max: 3301376 + min: 8921088 + max: 8921088 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 19 + layers_on_npu: 63 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 19 - job_id: jg9ljv7vg + layers_on_cpu: 12 + total_layers: 75 + job_id: jgdxrmqkp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -586,4 +533,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:06:51Z' + timestamp: '2024-11-09T23:25:17Z' diff --git a/qai_hub_models/models/xlsr_quantized/test.py b/qai_hub_models/models/xlsr_quantized/test.py deleted file mode 100644 index 609cda51..00000000 --- a/qai_hub_models/models/xlsr_quantized/test.py +++ /dev/null @@ -1,42 +0,0 @@ -# --------------------------------------------------------------------- -# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -# --------------------------------------------------------------------- -import numpy as np -import torch - -from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp -from qai_hub_models.models._shared.super_resolution.demo import IMAGE_ADDRESS -from qai_hub_models.models.xlsr.model import MODEL_ASSET_VERSION, MODEL_ID -from qai_hub_models.models.xlsr_quantized.demo import main as demo_main -from qai_hub_models.models.xlsr_quantized.model import XLSRQuantizable -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image -from qai_hub_models.utils.testing import assert_most_close, skip_clone_repo_check - -OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, "xlsr_demo_output.png" -) - - -@skip_clone_repo_check -def test_task(): - # AIMET Quantization Simulator introduces randomness. Eliminate that for this test. - torch.manual_seed(0) - image = load_image(IMAGE_ADDRESS) - output_image = load_image(OUTPUT_IMAGE_ADDRESS) - model = XLSRQuantizable.from_pretrained() - app = SuperResolutionApp(model=model) - app_output_image = app.upscale_image(image)[0] - - assert_most_close( - np.asarray(app_output_image, dtype=np.float32) / 255, - np.asarray(output_image, dtype=np.float32) / 255, - diff_tol=1e-4, - rtol=0.02, - atol=0.2, - ) - - -@skip_clone_repo_check -def test_demo(): - demo_main(is_test=True) diff --git a/qai_hub_models/models/yolonas/README.md b/qai_hub_models/models/yolonas/README.md index f1467c01..7b19fc1e 100644 --- a/qai_hub_models/models/yolonas/README.md +++ b/qai_hub_models/models/yolonas/README.md @@ -5,8 +5,7 @@ YoloNAS is a machine learning model that predicts bounding boxes and classes of objects in an image. -This is based on the implementation of Yolo-NAS found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/yolonas). diff --git a/qai_hub_models/models/yolonas/evaluate.py b/qai_hub_models/models/yolonas/evaluate.py new file mode 100644 index 00000000..5fa47c55 --- /dev/null +++ b/qai_hub_models/models/yolonas/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.yolonas import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["coco"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=250, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/yolonas/perf.yaml b/qai_hub_models/models/yolonas/perf.yaml index 27e86970..c09b4740 100644 --- a/qai_hub_models/models/yolonas/perf.yaml +++ b/qai_hub_models/models/yolonas/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: Yolo-NAS performance_metrics: - torchscript_onnx_tflite: - inference_time: 10903.0 - throughput: 91.71787581399614 + inference_time: 10958.0 + throughput: 91.25752874612155 estimated_peak_memory_range: - min: 12288 - max: 6196680 + min: 69632 + max: 6511680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 201 - job_id: jp3jvk7zg + job_id: jg9l3dzwg job_status: Passed torchscript_onnx_qnn: - inference_time: 14820.0 - throughput: 67.47638326585695 + inference_time: 15172.0 + throughput: 65.9108884787767 estimated_peak_memory_range: - min: 4952064 - max: 26148800 + min: 4960256 + max: 24353368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jgdxew4lp + job_id: jpxk7xll5 job_status: Passed torchscript_onnx: - inference_time: 7782.0 - throughput: 128.5016705217168 + inference_time: 7696.0 + throughput: 129.93762993762994 estimated_peak_memory_range: - min: 192512 - max: 28409400 + min: 20480 + max: 45252672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jp8q0ok8p + job_id: jglv0lwm5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:06:02Z' + timestamp: '2024-11-09T21:54:14Z' - torchscript_onnx_tflite: - inference_time: 10909.0 - throughput: 91.66743056192135 + inference_time: 7343.0 + throughput: 136.1841209314994 estimated_peak_memory_range: - min: 249856 - max: 7311280 + min: 20480 + max: 112242624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 201 - job_id: jgo2kywdp + job_id: jp14d6n8p job_status: Passed torchscript_onnx_qnn: - inference_time: 14860.0 - throughput: 67.29475100942126 + inference_time: 10083.0 + throughput: 99.1768322919766 estimated_peak_memory_range: - min: 4939776 - max: 27525408 + min: 4931584 + max: 38787024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: j57y0z8r5 + job_id: j5mnw809p job_status: Passed torchscript_onnx: - inference_time: 7721.0 - throughput: 129.5169019557052 + inference_time: 5262.0 + throughput: 190.04180919802357 estimated_peak_memory_range: - min: 40960 - max: 27007888 + min: 5361664 + max: 119898864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jgke7zkog + job_id: j56y3woyp job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:06:03Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T21:54:15Z' - torchscript_onnx_tflite: - inference_time: 7966.0 - throughput: 125.53351744915892 + inference_time: 7792.0 + throughput: 128.33675564681724 estimated_peak_memory_range: - min: 237568 - max: 111037920 + min: 212992 + max: 56544016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 201 - job_id: jpv603mm5 + job_id: jgdxr2drp job_status: Passed torchscript_onnx_qnn: - inference_time: 10841.0 - throughput: 92.2424130615257 + inference_time: 10039.0 + throughput: 99.61151509114454 estimated_peak_memory_range: - min: 4947968 - max: 39346976 + min: 4927488 + max: 34214160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jp4lkq2l5 + job_id: jgn69kzq5 job_status: Passed torchscript_onnx: - inference_time: 5948.0 - throughput: 168.12373907195695 + inference_time: 5173.0 + throughput: 193.31142470520007 estimated_peak_memory_range: - min: 3354624 - max: 118615040 + min: 1585152 + max: 57785920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: j5q6e8dmp + job_id: jp3j46ong job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:06:04Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T21:54:16Z' - torchscript_onnx_tflite: - inference_time: 7807.0 - throughput: 128.0901754835404 + inference_time: 10788.0 + throughput: 92.69558769002596 estimated_peak_memory_range: - min: 110592 - max: 56294240 + min: 221184 + max: 6573376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 201 - job_id: jp14y01lp + job_id: j5wed4lm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 10011.0 - throughput: 99.89012086704625 + inference_time: 9522.0 + throughput: 105.01995379122033 estimated_peak_memory_range: - min: 4927488 - max: 34192256 + min: 4993024 + max: 6517296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jp0z3yxe5 - job_status: Passed - torchscript_onnx: - inference_time: 5191.0 - throughput: 192.64110961279138 - estimated_peak_memory_range: - min: 5337088 - max: 62141216 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 290 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 290 - job_id: j56ye607p + job_id: jprv4wl7g job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:06:06Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T21:54:06Z' - torchscript_onnx_tflite: - inference_time: 10815.0 - throughput: 92.46417013407304 + inference_time: 10945.0 + throughput: 91.36592051164915 estimated_peak_memory_range: - min: 49152 - max: 27818912 + min: 221184 + max: 6943432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 201 - job_id: jgjvzxy8g + job_id: jg9l3dz8g job_status: Passed torchscript_onnx_qnn: - inference_time: 9500.0 - throughput: 105.26315789473684 + inference_time: 9618.0 + throughput: 103.97171969224371 estimated_peak_memory_range: - min: 4984832 - max: 6345464 + min: 4988928 + max: 6559632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jpxknvz95 + job_id: jpy14molp job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:05:54Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T21:54:08Z' - torchscript_onnx_tflite: - inference_time: 10822.0 - throughput: 92.40436148586214 + inference_time: 10927.0 + throughput: 91.51642719868217 estimated_peak_memory_range: - min: 258048 - max: 7360480 + min: 241664 + max: 7540888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 201 - job_id: j56yz6zyp + job_id: jp14d6n7p job_status: Passed torchscript_onnx_qnn: - inference_time: 9577.0 - throughput: 104.41683199331732 + inference_time: 9478.0 + throughput: 105.50749103186327 estimated_peak_memory_range: - min: 4968448 - max: 6306368 + min: 5001216 + max: 6330144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jpv6r3rr5 + job_id: jp0z16mn5 job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:04:12Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T21:54:09Z' - torchscript_onnx_tflite: - inference_time: 10702.0 - throughput: 93.44047841524949 + inference_time: 10917.0 + throughput: 91.60025648071814 estimated_peak_memory_range: - min: 217088 - max: 8970520 + min: 0 + max: 37225864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 201 - job_id: jp3j1k1ng + job_id: jgdxr2dzp job_status: Passed torchscript_onnx_qnn: - inference_time: 9495.0 - throughput: 105.318588730911 + inference_time: 9574.0 + throughput: 104.44955086693128 estimated_peak_memory_range: - min: 4952064 - max: 6660856 + min: 5197824 + max: 6459664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jgjv2x2eg + job_id: jp8q31eop job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:04:13Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T21:54:10Z' - torchscript_onnx_tflite: - inference_time: 10644.0 - throughput: 93.94964299135663 + inference_time: 15661.0 + throughput: 63.852882957665535 estimated_peak_memory_range: - min: 221184 - max: 5884544 + min: 237568 + max: 54932784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 201 - job_id: jgo2nynkp + job_id: j57yj9e95 job_status: Passed torchscript_onnx_qnn: - inference_time: 9503.0 - throughput: 105.2299273913501 + inference_time: 14181.0 + throughput: 70.51688879486638 estimated_peak_memory_range: - min: 4964352 - max: 6234520 + min: 57344 + max: 5638816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jgz3jejx5 + job_id: jgkel82ng job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:04:14Z' + chipset: SA8295P + timestamp: '2024-11-09T21:54:12Z' - torchscript_onnx_tflite: - inference_time: 14014.0 - throughput: 71.35721421435707 + inference_time: 13931.0 + throughput: 71.78235589692054 estimated_peak_memory_range: - min: 634880 - max: 107311056 + min: 258048 + max: 108115840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 201 - job_id: jg9ljv2vg + job_id: jp4lx3y15 job_status: Passed torchscript_onnx_qnn: - inference_time: 18261.0 - throughput: 54.76151360823613 + inference_time: 18521.0 + throughput: 53.99276496949409 estimated_peak_memory_range: - min: 4952064 - max: 35480368 + min: 4931584 + max: 38022448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jpy1rey4p + job_id: j5q67vlop job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:06:00Z' + timestamp: '2024-11-09T21:54:13Z' - torchscript_onnx_qnn: - inference_time: 10272.0 - throughput: 97.35202492211837 + inference_time: 10215.0 + throughput: 97.8952520802741 estimated_peak_memory_range: min: 4923392 max: 4923392 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: j5mnqrlqp + job_id: jp2k7erqp job_status: Passed torchscript_onnx: - inference_time: 8345.0 - throughput: 119.83223487118035 + inference_time: 8304.0 + throughput: 120.42389210019267 estimated_peak_memory_range: - min: 22265856 - max: 22265856 + min: 22159360 + max: 22159360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jglv6nql5 + job_id: jgo218dkp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:06:05Z' + timestamp: '2024-11-09T21:54:17Z' diff --git a/qai_hub_models/models/yolonas/requirements.txt b/qai_hub_models/models/yolonas/requirements.txt index a7bb275e..4e444e52 100644 --- a/qai_hub_models/models/yolonas/requirements.txt +++ b/qai_hub_models/models/yolonas/requirements.txt @@ -13,3 +13,4 @@ onnxsim<=0.4.36 termcolor<=2.5.0 psutil>6,<7 jsonschema>4,<5 +fiftyone==1.0.1 diff --git a/qai_hub_models/models/yolonas_quantized/README.md b/qai_hub_models/models/yolonas_quantized/README.md index 0894542c..d3862c42 100644 --- a/qai_hub_models/models/yolonas_quantized/README.md +++ b/qai_hub_models/models/yolonas_quantized/README.md @@ -5,8 +5,7 @@ YoloNAS is a machine learning model that predicts bounding boxes and classes of objects in an image. This model is post-training quantized to int8 using samples from the COCO dataset. -This is based on the implementation of Yolo-NAS-Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/yolonas_quantized). diff --git a/qai_hub_models/models/yolonas_quantized/evaluate.py b/qai_hub_models/models/yolonas_quantized/evaluate.py new file mode 100644 index 00000000..5c7dabfb --- /dev/null +++ b/qai_hub_models/models/yolonas_quantized/evaluate.py @@ -0,0 +1,64 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.yolonas_quantized import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.base_model import BaseModel +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args +from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin + +SUPPORTED_DATASETS = ["coco"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=250, + supported_datasets=SUPPORTED_DATASETS, + supports_qnn=False, + supports_onnx=False, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + + # Use Fp16 model for torch inference + for cls in Model.__mro__: + if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin): + torch_cls = cls + break + torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/yolonas_quantized/export.py b/qai_hub_models/models/yolonas_quantized/export.py index be59eed3..57fe0a8c 100644 --- a/qai_hub_models/models/yolonas_quantized/export.py +++ b/qai_hub_models/models/yolonas_quantized/export.py @@ -210,12 +210,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser( - model_cls=Model, - supports_qnn=False, - supports_onnx=False, - supports_precompiled_qnn_onnx=False, - ) + parser = export_parser(model_cls=Model, supports_qnn=False, supports_onnx=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/yolonas_quantized/perf.yaml b/qai_hub_models/models/yolonas_quantized/perf.yaml index e062cdfe..68f32bac 100644 --- a/qai_hub_models/models/yolonas_quantized/perf.yaml +++ b/qai_hub_models/models/yolonas_quantized/perf.yaml @@ -46,34 +46,11 @@ models: - name: Yolo-NAS-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 4771.0 - throughput: 209.5996646405366 + inference_time: 4714.0 + throughput: 212.13406873143828 estimated_peak_memory_range: - min: 81920 - max: 6056176 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 204 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 204 - job_id: jpy1re84p - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:04:51Z' - - torchscript_onnx_tflite: - inference_time: 4770.0 - throughput: 209.64360587002096 - estimated_peak_memory_range: - min: 49152 - max: 4727152 + min: 73728 + max: 4920560 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,7 +58,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jp0z3yoe5 + job_id: jgjv0q7vg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -90,13 +67,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:04:52Z' + timestamp: '2024-11-09T21:53:05Z' - torchscript_onnx_tflite: - inference_time: 3065.0 - throughput: 326.2642740619902 + inference_time: 3047.0 + throughput: 328.1916639317361 estimated_peak_memory_range: - min: 69632 - max: 85106016 + min: 12288 + max: 85942432 primary_compute_unit: NPU precision: int8 layer_info: @@ -104,7 +81,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jp8q0oj8p + job_id: jpedryzo5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -113,13 +90,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:04:53Z' + timestamp: '2024-11-09T21:53:06Z' - torchscript_onnx_tflite: - inference_time: 2591.0 - throughput: 385.95137012736393 + inference_time: 2601.0 + throughput: 384.46751249519417 estimated_peak_memory_range: min: 8192 - max: 57290352 + max: 57191584 primary_compute_unit: NPU precision: int8 layer_info: @@ -127,7 +104,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jgjvzx38g + job_id: jgz3xnmo5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -136,13 +113,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:05:02Z' + timestamp: '2024-11-09T21:53:08Z' - torchscript_onnx_tflite: - inference_time: 14710.0 - throughput: 67.98096532970769 + inference_time: 14746.0 + throughput: 67.81500067815001 estimated_peak_memory_range: min: 61440 - max: 70300256 + max: 69402176 primary_compute_unit: NPU precision: int8 layer_info: @@ -150,7 +127,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jgke7z6og + job_id: j5wed4735 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -159,7 +136,7 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-26T23:04:54Z' + timestamp: '2024-11-09T21:53:09Z' - reference_device_info: name: RB5 (Proxy) os: '12' @@ -167,13 +144,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-26T23:04:56Z' + timestamp: '2024-11-09T21:53:09Z' - torchscript_onnx_tflite: - inference_time: 4697.0 - throughput: 212.90185224611454 + inference_time: 4681.0 + throughput: 213.62956633198036 estimated_peak_memory_range: - min: 73728 - max: 7269464 + min: 69632 + max: 10168376 primary_compute_unit: NPU precision: int8 layer_info: @@ -181,7 +158,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jglv6nwl5 + job_id: jp14d6j8p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -190,13 +167,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:04:57Z' + timestamp: '2024-11-09T21:53:11Z' - torchscript_onnx_tflite: - inference_time: 4788.0 - throughput: 208.85547201336675 + inference_time: 4786.0 + throughput: 208.94274968658587 estimated_peak_memory_range: - min: 73728 - max: 7192640 + min: 110592 + max: 27704328 primary_compute_unit: NPU precision: int8 layer_info: @@ -204,7 +181,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jgn6m2lq5 + job_id: jgdxr23rp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -213,13 +190,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:03:45Z' + timestamp: '2024-11-09T21:53:12Z' - torchscript_onnx_tflite: - inference_time: 4776.0 - throughput: 209.38023450586266 + inference_time: 4771.0 + throughput: 209.5996646405366 estimated_peak_memory_range: - min: 0 - max: 2441040 + min: 98304 + max: 14272504 primary_compute_unit: NPU precision: int8 layer_info: @@ -227,7 +204,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jprv2k87g + job_id: j57yj94v5 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -236,13 +213,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:03:45Z' + timestamp: '2024-11-09T21:53:13Z' - torchscript_onnx_tflite: - inference_time: 4759.0 - throughput: 210.1281781886951 + inference_time: 4792.0 + throughput: 208.6811352253756 estimated_peak_memory_range: - min: 57344 - max: 7108536 + min: 73728 + max: 10914752 primary_compute_unit: NPU precision: int8 layer_info: @@ -250,7 +227,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jpy1jerlp + job_id: jp4lx3185 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -259,13 +236,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:03:47Z' + timestamp: '2024-11-09T21:53:14Z' - torchscript_onnx_tflite: - inference_time: 6495.0 - throughput: 153.96458814472672 + inference_time: 6530.0 + throughput: 153.1393568147014 estimated_peak_memory_range: min: 65536 - max: 56271504 + max: 56281952 primary_compute_unit: NPU precision: int8 layer_info: @@ -273,7 +250,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jp2k980qp + job_id: jpxk7x435 job_status: Passed reference_device_info: name: SA8295P ADP @@ -282,13 +259,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T01:03:46Z' + timestamp: '2024-11-09T21:53:15Z' - torchscript_onnx_tflite: - inference_time: 5198.0 - throughput: 192.3816852635629 + inference_time: 5164.0 + throughput: 193.64833462432222 estimated_peak_memory_range: - min: 0 - max: 85858160 + min: 118784 + max: 83579440 primary_compute_unit: NPU precision: int8 layer_info: @@ -296,7 +273,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jpv6032m5 + job_id: j5mnw8mdp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -305,4 +282,4 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:05:01Z' + timestamp: '2024-11-09T21:53:16Z' diff --git a/qai_hub_models/models/yolonas_quantized/requirements.txt b/qai_hub_models/models/yolonas_quantized/requirements.txt index d670a40e..9bdff789 100644 --- a/qai_hub_models/models/yolonas_quantized/requirements.txt +++ b/qai_hub_models/models/yolonas_quantized/requirements.txt @@ -14,3 +14,4 @@ onnxsim<=0.4.36 termcolor<=2.5.0 psutil>6,<7 jsonschema>4,<5 +fiftyone==1.0.1 diff --git a/qai_hub_models/models/yolov11_det/README.md b/qai_hub_models/models/yolov11_det/README.md index 99e7da65..fa32ff8f 100644 --- a/qai_hub_models/models/yolov11_det/README.md +++ b/qai_hub_models/models/yolov11_det/README.md @@ -5,8 +5,7 @@ Ultralytics YOLOv11 is a machine learning model that predicts bounding boxes and classes of objects in an image. -This is based on the implementation of YOLOv11-Detection found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/yolov11_det). diff --git a/qai_hub_models/models/yolov11_det/perf.yaml b/qai_hub_models/models/yolov11_det/perf.yaml index bd0279e3..e4414228 100644 --- a/qai_hub_models/models/yolov11_det/perf.yaml +++ b/qai_hub_models/models/yolov11_det/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: YOLOv11-Detection performance_metrics: - torchscript_onnx_tflite: - inference_time: 5537.0 - throughput: 180.60321473722232 + inference_time: 5465.0 + throughput: 182.9826166514181 estimated_peak_memory_range: - min: 258048 - max: 2844880 + min: 241664 + max: 2422000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 382 - job_id: jpy1reorp + job_id: j56y3wd6p job_status: Passed torchscript_onnx_qnn: - inference_time: 5615.0 - throughput: 178.09439002671417 + inference_time: 5545.0 + throughput: 180.34265103697024 estimated_peak_memory_range: - min: 5017600 - max: 22447864 + min: 4935680 + max: 16296632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jpv603l75 + job_id: jp14d668p job_status: Passed torchscript_onnx: - inference_time: 5970.0 - throughput: 167.50418760469012 + inference_time: 5934.0 + throughput: 168.52039096730704 estimated_peak_memory_range: - min: 4943872 - max: 10364368 + min: 4956160 + max: 10335248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 376 - job_id: jp14y0nlp + job_id: jp0z16695 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:03:50Z' + timestamp: '2024-11-09T21:52:01Z' - torchscript_onnx_tflite: - inference_time: 5532.0 - throughput: 180.76644974692698 + inference_time: 3876.0 + throughput: 257.99793601651186 estimated_peak_memory_range: - min: 245760 - max: 2625912 + min: 12288 + max: 101331568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 382 - job_id: jp0z3ym25 + job_id: jp3j46w3g job_status: Passed torchscript_onnx_qnn: - inference_time: 5533.0 - throughput: 180.7337791433219 + inference_time: 3899.0 + throughput: 256.4760194921775 estimated_peak_memory_range: - min: 6275072 - max: 24014600 + min: 4931584 + max: 58992992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jgjvzxr7g + job_id: jgdxr22rp job_status: Passed torchscript_onnx: - inference_time: 5941.0 - throughput: 168.32183134152498 + inference_time: 4224.0 + throughput: 236.74242424242425 estimated_peak_memory_range: - min: 6832128 - max: 11531144 + min: 5365760 + max: 125599792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 376 - job_id: jgdxewdlp + job_id: jp8q311kp job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:03:51Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T21:52:02Z' - torchscript_onnx_tflite: - inference_time: 3961.0 - throughput: 252.46149962130775 + inference_time: 3175.0 + throughput: 314.96062992125985 estimated_peak_memory_range: - min: 12288 - max: 101644624 + min: 8192 + max: 66074608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 382 - job_id: jp8q0oezp + job_id: jgo2184qp job_status: Passed torchscript_onnx_qnn: - inference_time: 4714.0 - throughput: 212.13406873143828 + inference_time: 3889.0 + throughput: 257.1355104139882 estimated_peak_memory_range: - min: 4931584 - max: 52581104 + min: 4927488 + max: 51744224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jpede9775 + job_id: j57yj99v5 job_status: Passed torchscript_onnx: - inference_time: 4293.0 - throughput: 232.93733985557884 + inference_time: 3964.0 + throughput: 252.27043390514632 estimated_peak_memory_range: - min: 430080 - max: 121865408 + min: 5369856 + max: 81565056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 376 - job_id: j57y0zor5 + job_id: jgkel88wg job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:03:52Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T21:52:03Z' - torchscript_onnx_tflite: - inference_time: 3845.0 - throughput: 260.0780234070221 + inference_time: 5450.0 + throughput: 183.4862385321101 estimated_peak_memory_range: - min: 212992 - max: 67353392 + min: 217088 + max: 1879336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 382 - job_id: jgo2kyl4p + job_id: jpv6177k5 job_status: Passed torchscript_onnx_qnn: - inference_time: 4086.0 - throughput: 244.73813020068528 + inference_time: 5355.0 + throughput: 186.74136321195144 estimated_peak_memory_range: - min: 4927488 - max: 50786864 + min: 4968448 + max: 6197512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jg9ljvzvg - job_status: Passed - torchscript_onnx: - inference_time: 3990.0 - throughput: 250.6265664160401 - estimated_peak_memory_range: - min: 5341184 - max: 82109008 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 376 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 376 - job_id: jpxknv095 + job_id: jp4lx3385 job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:03:54Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T21:51:53Z' - torchscript_onnx_tflite: - inference_time: 5442.0 - throughput: 183.75597206909225 + inference_time: 5445.0 + throughput: 183.65472910927457 estimated_peak_memory_range: - min: 229376 - max: 2073248 + min: 40960 + max: 137344616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 382 - job_id: jgke7z2yg + job_id: jgjv0qqvg job_status: Passed torchscript_onnx_qnn: - inference_time: 5374.0 - throughput: 186.08113137327874 + inference_time: 5338.0 + throughput: 187.33608092918695 estimated_peak_memory_range: - min: 4960256 - max: 6136032 + min: 4988928 + max: 6767744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jgz3oelz5 + job_id: j5mnw88dp job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:03:42Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T21:51:55Z' - torchscript_onnx_tflite: - inference_time: 5493.0 - throughput: 182.04988166757693 + inference_time: 5459.0 + throughput: 183.18373328448433 estimated_peak_memory_range: - min: 245760 - max: 2133128 + min: 233472 + max: 2201248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 382 - job_id: jpedw9ev5 + job_id: jpedryyo5 job_status: Passed torchscript_onnx_qnn: - inference_time: 5446.0 - throughput: 183.6210062431142 + inference_time: 5358.0 + throughput: 186.6368047779022 estimated_peak_memory_range: - min: 4976640 - max: 6309088 + min: 4988928 + max: 6741376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jg9lyvj8g + job_id: jgn69kkk5 job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:02:47Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T21:51:57Z' - torchscript_onnx_tflite: - inference_time: 5518.0 - throughput: 181.2250815512867 + inference_time: 5446.0 + throughput: 183.6210062431142 estimated_peak_memory_range: - min: 770048 - max: 2757024 + min: 802816 + max: 2690456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 382 - job_id: jgz3jeox5 + job_id: jgz3xnno5 job_status: Passed torchscript_onnx_qnn: - inference_time: 5435.0 - throughput: 183.99264029438822 + inference_time: 5423.0 + throughput: 184.39977872026554 estimated_peak_memory_range: min: 5001216 - max: 6299656 + max: 6495824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jp14w0y7p + job_id: jprv4ww0g job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:02:48Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T21:51:58Z' - torchscript_onnx_tflite: - inference_time: 5467.0 - throughput: 182.91567587342234 + inference_time: 10482.0 + throughput: 95.40164090822363 estimated_peak_memory_range: - min: 270336 - max: 15840960 + min: 237568 + max: 61386608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 382 - job_id: j5we3o2m5 + job_id: j5wed4435 job_status: Passed torchscript_onnx_qnn: - inference_time: 5429.0 - throughput: 184.1959845275373 + inference_time: 9436.0 + throughput: 105.977108944468 estimated_peak_memory_range: - min: 4960256 - max: 6260168 + min: 45056 + max: 5627776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jgdxqwezp + job_id: jp2k7eerp job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:02:49Z' + chipset: SA8295P + timestamp: '2024-11-09T21:51:59Z' - torchscript_onnx_tflite: - inference_time: 9173.0 - throughput: 109.01558922925979 + inference_time: 9029.0 + throughput: 110.75423634954036 estimated_peak_memory_range: - min: 278528 - max: 98339232 + min: 241664 + max: 95992064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 382 - job_id: jp3jvkzxg + job_id: jg9l3ddwg job_status: Passed torchscript_onnx_qnn: - inference_time: 8474.0 - throughput: 118.0080245456691 + inference_time: 8512.0 + throughput: 117.4812030075188 estimated_peak_memory_range: - min: 4931584 - max: 39729168 + min: 4952064 + max: 38987936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: j5we2olj5 + job_id: jpy14mm8p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:03:48Z' + timestamp: '2024-11-09T21:52:00Z' - torchscript_onnx_qnn: - inference_time: 5712.0 - throughput: 175.07002801120447 + inference_time: 5710.0 + throughput: 175.13134851138355 estimated_peak_memory_range: min: 4923392 max: 4923392 @@ -455,11 +442,11 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: j5we2olz5 + job_id: jpxk7xx35 job_status: Passed torchscript_onnx: - inference_time: 6772.0 - throughput: 147.66686355581808 + inference_time: 6835.0 + throughput: 146.3057790782736 estimated_peak_memory_range: min: 4931584 max: 4931584 @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 376 - job_id: jp4lkqel5 + job_id: j5q67vvnp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:03:53Z' + timestamp: '2024-11-09T21:52:04Z' diff --git a/qai_hub_models/models/yolov6/README.md b/qai_hub_models/models/yolov6/README.md index a167d0bc..d5da57f2 100644 --- a/qai_hub_models/models/yolov6/README.md +++ b/qai_hub_models/models/yolov6/README.md @@ -5,8 +5,7 @@ YoloV6 is a machine learning model that predicts bounding boxes and classes of objects in an image. -This is based on the implementation of Yolo-v6 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/yolov6). diff --git a/qai_hub_models/models/yolov6/perf.yaml b/qai_hub_models/models/yolov6/perf.yaml index cdd94d44..0a318103 100644 --- a/qai_hub_models/models/yolov6/perf.yaml +++ b/qai_hub_models/models/yolov6/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: Yolo-v6 performance_metrics: - torchscript_onnx_tflite: - inference_time: 6175.0 - throughput: 161.94331983805668 + inference_time: 6034.0 + throughput: 165.72754391779912 estimated_peak_memory_range: - min: 36864 - max: 3956816 + min: 241664 + max: 4238344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: j5mnqr8yp + job_id: jp8q316kp job_status: Passed torchscript_onnx_qnn: - inference_time: 5221.0 - throughput: 191.5341888527102 + inference_time: 5227.0 + throughput: 191.3143294432753 estimated_peak_memory_range: - min: 4939776 - max: 18613904 + min: 5890048 + max: 18271664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jglv6nxe5 + job_id: jpedrylo5 job_status: Passed torchscript_onnx: - inference_time: 6252.0 - throughput: 159.9488163787588 + inference_time: 6073.0 + throughput: 164.66326362588507 estimated_peak_memory_range: - min: 40960 - max: 10253168 + min: 16384 + max: 10372624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jp14y0jkp + job_id: jgn69k7k5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:03:08Z' + timestamp: '2024-11-09T21:51:17Z' - torchscript_onnx_tflite: - inference_time: 6278.0 - throughput: 159.28639694170118 + inference_time: 4172.0 + throughput: 239.69319271332694 estimated_peak_memory_range: - min: 258048 - max: 3395648 + min: 12288 + max: 99320624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: jgn6l2nv5 + job_id: jgkel8owg job_status: Passed torchscript_onnx_qnn: - inference_time: 5293.0 - throughput: 188.9287738522577 + inference_time: 3743.0 + throughput: 267.1653753673524 estimated_peak_memory_range: min: 4931584 - max: 15029584 + max: 53112784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: j56ye67vp + job_id: jgz3xn4o5 job_status: Passed torchscript_onnx: - inference_time: 6125.0 - throughput: 163.26530612244898 + inference_time: 4209.0 + throughput: 237.58612497030174 estimated_peak_memory_range: - min: 40960 - max: 8879776 + min: 5373952 + max: 118749984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jgdxew3kp + job_id: jprv4wn0g job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:03:09Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T21:51:18Z' - torchscript_onnx_tflite: - inference_time: 4620.0 - throughput: 216.45021645021646 + inference_time: 4342.0 + throughput: 230.30861354214647 estimated_peak_memory_range: - min: 217088 - max: 95707056 + min: 212992 + max: 61154400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: jprv8k0vg + job_id: j5q67vznp job_status: Passed torchscript_onnx_qnn: - inference_time: 4085.0 - throughput: 244.79804161566707 + inference_time: 4056.0 + throughput: 246.5483234714004 estimated_peak_memory_range: - min: 802816 - max: 49882256 + min: 4927488 + max: 50600272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jp3jvk9xg + job_id: j5wed4135 job_status: Passed torchscript_onnx: - inference_time: 4464.0 - throughput: 224.01433691756273 + inference_time: 3474.0 + throughput: 287.85261945883707 estimated_peak_memory_range: - min: 4124672 - max: 114739056 + min: 0 + max: 68982848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: j57y0z4q5 + job_id: jp2k7evrp job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:03:10Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T21:51:19Z' - torchscript_onnx_tflite: - inference_time: 3800.0 - throughput: 263.1578947368421 + inference_time: 5902.0 + throughput: 169.43409013893594 estimated_peak_memory_range: - min: 8192 - max: 61204160 + min: 237568 + max: 222903784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: j5q6e8y7p + job_id: jglv0loj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3820.0 - throughput: 261.78010471204186 + inference_time: 5339.0 + throughput: 187.30099269526127 estimated_peak_memory_range: - min: 4927488 - max: 49651120 + min: 5005312 + max: 6214336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jg9ljvmqg - job_status: Passed - torchscript_onnx: - inference_time: 4207.0 - throughput: 237.69907297361542 - estimated_peak_memory_range: - min: 5337088 - max: 74457072 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 228 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 228 - job_id: jpxknv4j5 + job_id: jg9l3dxwg job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:03:12Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T21:51:09Z' - torchscript_onnx_tflite: - inference_time: 6228.0 - throughput: 160.56518946692358 + inference_time: 6069.0 + throughput: 164.77179106936893 estimated_peak_memory_range: - min: 221184 - max: 3957496 + min: 24576 + max: 3163824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: jp2k08wxp + job_id: j56y3wr6p job_status: Passed torchscript_onnx_qnn: - inference_time: 5381.0 - throughput: 185.8390633711206 + inference_time: 5406.0 + throughput: 184.9796522382538 estimated_peak_memory_range: - min: 4980736 - max: 6204504 + min: 4984832 + max: 6846552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jgo2kyr4p + job_id: jgdxr2zrp job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:03:00Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T21:51:12Z' - torchscript_onnx_tflite: - inference_time: 6199.0 - throughput: 161.31634134537828 + inference_time: 5986.0 + throughput: 167.0564650851988 estimated_peak_memory_range: - min: 217088 - max: 4314464 + min: 69632 + max: 15897352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: jp8qmo2op + job_id: jp3j46x3g job_status: Passed torchscript_onnx_qnn: - inference_time: 5386.0 - throughput: 185.6665428889714 + inference_time: 5345.0 + throughput: 187.0907390084191 estimated_peak_memory_range: - min: 5013504 - max: 6363512 + min: 4960256 + max: 6753248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jglv2n6m5 + job_id: j57yj97v5 job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:02:26Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T21:51:13Z' - torchscript_onnx_tflite: - inference_time: 6176.0 - throughput: 161.91709844559585 + inference_time: 6120.0 + throughput: 163.3986928104575 estimated_peak_memory_range: - min: 253952 - max: 4164984 + min: 217088 + max: 4057760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: jgkeqzvng + job_id: jgo218oqp job_status: Passed torchscript_onnx_qnn: - inference_time: 5343.0 - throughput: 187.16077110237694 + inference_time: 5361.0 + throughput: 186.53236336504384 estimated_peak_memory_range: - min: 4993024 - max: 6329120 + min: 4976640 + max: 6320032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: j56yz6eyp + job_id: jp4lx3985 job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:02:27Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T21:51:14Z' - torchscript_onnx_tflite: - inference_time: 6177.0 - throughput: 161.89088554314392 + inference_time: 8304.0 + throughput: 120.42389210019267 estimated_peak_memory_range: - min: 184320 - max: 9115472 + min: 217088 + max: 45231552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: j5q6r80op + job_id: jpv6179k5 job_status: Passed torchscript_onnx_qnn: - inference_time: 5367.0 - throughput: 186.3238308179616 + inference_time: 7733.0 + throughput: 129.315918789603 estimated_peak_memory_range: - min: 4952064 - max: 6821232 + min: 49152 + max: 5641392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jp3j1kvng + job_id: jpxk7xd35 job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:02:28Z' + chipset: SA8295P + timestamp: '2024-11-09T21:51:15Z' - torchscript_onnx_tflite: - inference_time: 6998.0 - throughput: 142.89797084881394 + inference_time: 7226.0 + throughput: 138.3891502906172 estimated_peak_memory_range: - min: 270336 - max: 78534768 + min: 421888 + max: 77731472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: jgke7z4yg + job_id: jgjv0qwvg job_status: Passed torchscript_onnx_qnn: - inference_time: 6975.0 - throughput: 143.36917562724014 + inference_time: 6927.0 + throughput: 144.36263894904 estimated_peak_memory_range: - min: 4952064 - max: 49600448 + min: 4931584 + max: 39076928 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: j5we2o7z5 + job_id: j5mnw8ddp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:03:06Z' + timestamp: '2024-11-09T21:51:16Z' - torchscript_onnx_qnn: - inference_time: 5697.0 - throughput: 175.53098121818502 + inference_time: 5767.0 + throughput: 173.40038148083926 estimated_peak_memory_range: min: 4923392 max: 4923392 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jpv603d75 + job_id: jp14d6v8p job_status: Passed torchscript_onnx: - inference_time: 6328.0 - throughput: 158.02781289506953 + inference_time: 6386.0 + throughput: 156.59254619480112 estimated_peak_memory_range: - min: 6946816 - max: 6946816 + min: 8282112 + max: 8282112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jp4lkq1q5 + job_id: jpy14m78p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:03:11Z' + timestamp: '2024-11-09T21:51:20Z' diff --git a/qai_hub_models/models/yolov7/README.md b/qai_hub_models/models/yolov7/README.md index 861ce55b..1c64ca33 100644 --- a/qai_hub_models/models/yolov7/README.md +++ b/qai_hub_models/models/yolov7/README.md @@ -5,8 +5,7 @@ YoloV7 is a machine learning model that predicts bounding boxes and classes of objects in an image. -This is based on the implementation of Yolo-v7 found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/yolov7). diff --git a/qai_hub_models/models/yolov7/evaluate.py b/qai_hub_models/models/yolov7/evaluate.py new file mode 100644 index 00000000..aafdf591 --- /dev/null +++ b/qai_hub_models/models/yolov7/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.yolov7 import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["coco"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=250, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/yolov7/perf.yaml b/qai_hub_models/models/yolov7/perf.yaml index 604376e8..f307a85e 100644 --- a/qai_hub_models/models/yolov7/perf.yaml +++ b/qai_hub_models/models/yolov7/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: Yolo-v7 performance_metrics: - torchscript_onnx_tflite: - inference_time: 17129.0 - throughput: 58.38052425710783 + inference_time: 17203.0 + throughput: 58.12939603557519 estimated_peak_memory_range: - min: 40960 - max: 2751496 + min: 24576 + max: 10750504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jgdxewzkp + job_id: jp8q318xp job_status: Passed torchscript_onnx_qnn: - inference_time: 10675.0 - throughput: 93.6768149882904 + inference_time: 10583.0 + throughput: 94.49116507606539 estimated_peak_memory_range: - min: 6303744 - max: 23356712 + min: 4993024 + max: 22190256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jp0z3y625 + job_id: jpedry215 job_status: Passed torchscript_onnx: - inference_time: 12196.0 - throughput: 81.99409642505739 + inference_time: 12221.0 + throughput: 81.82636445462728 estimated_peak_memory_range: - min: 0 - max: 71814288 + min: 12288 + max: 13551016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 222 - job_id: jpede9y75 + job_id: j57yj9kv5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:02:24Z' + timestamp: '2024-11-09T21:50:33Z' - torchscript_onnx_tflite: - inference_time: 17213.0 - throughput: 58.095625399407425 + inference_time: 11527.0 + throughput: 86.75284115554784 estimated_peak_memory_range: - min: 643072 - max: 2897344 + min: 81920 + max: 104263200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: j57y0z7q5 + job_id: jgkel8d2g job_status: Passed torchscript_onnx_qnn: - inference_time: 10581.0 - throughput: 94.50902561194594 + inference_time: 7099.0 + throughput: 140.8649105507818 estimated_peak_memory_range: - min: 6340608 - max: 23497240 + min: 4931584 + max: 78839440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jp8q0o1zp + job_id: jgz3xnwk5 job_status: Passed torchscript_onnx: - inference_time: 12266.0 - throughput: 81.52616990053808 + inference_time: 7984.0 + throughput: 125.25050100200401 estimated_peak_memory_range: - min: 49152 - max: 14321104 + min: 6410240 + max: 130153072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 222 - job_id: jgz3oenz5 + job_id: jp4lx3m85 job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:02:25Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T21:50:34Z' - torchscript_onnx_tflite: - inference_time: 11671.0 - throughput: 85.68246080027419 + inference_time: 12198.0 + throughput: 81.98065256599442 estimated_peak_memory_range: - min: 651264 - max: 104211616 + min: 614400 + max: 73267216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jp4lkq9q5 + job_id: j5q67vw4p job_status: Passed torchscript_onnx_qnn: - inference_time: 7228.0 - throughput: 138.35085777531822 + inference_time: 7058.0 + throughput: 141.68319637291017 estimated_peak_memory_range: - min: 4931584 - max: 77809648 + min: 4927488 + max: 72845600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jgke7z8yg + job_id: j5wed4865 job_status: Passed torchscript_onnx: - inference_time: 8250.0 - throughput: 121.21212121212122 + inference_time: 8162.0 + throughput: 122.51899044351875 estimated_peak_memory_range: - min: 0 - max: 123137648 + min: 1953792 + max: 85297696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 222 - job_id: j5we2o4z5 + job_id: jpxk7x335 job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:02:26Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T21:50:35Z' - torchscript_onnx_tflite: - inference_time: 9902.0 - throughput: 100.98969905069683 + inference_time: 17066.0 + throughput: 58.59603890776984 estimated_peak_memory_range: - min: 614400 - max: 72967040 + min: 643072 + max: 3906568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jpy1remrp + job_id: jglv0l785 job_status: Passed torchscript_onnx_qnn: - inference_time: 7059.0 - throughput: 141.66312508853946 + inference_time: 10342.0 + throughput: 96.69309611293754 estimated_peak_memory_range: - min: 4931584 - max: 72866800 + min: 4993024 + max: 6189168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jgjvzxq7g - job_status: Passed - torchscript_onnx: - inference_time: 8140.0 - throughput: 122.85012285012284 - estimated_peak_memory_range: - min: 0 - max: 83739664 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 222 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 222 - job_id: jp14y06kp + job_id: jg9l3dklg job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:02:28Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T21:50:26Z' - torchscript_onnx_tflite: - inference_time: 17098.0 - throughput: 58.486372675166685 + inference_time: 17232.0 + throughput: 58.03156917363045 estimated_peak_memory_range: - min: 622592 - max: 2305200 + min: 28672 + max: 2402304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jpxknvdj5 + job_id: j56y3wv0p job_status: Passed torchscript_onnx_qnn: - inference_time: 10188.0 - throughput: 98.15469179426776 + inference_time: 10441.0 + throughput: 95.77626664112633 estimated_peak_memory_range: - min: 4980736 - max: 6230336 + min: 5017600 + max: 6311256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: j5q6e8v7p + job_id: jgdxr28ep job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:02:16Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T21:50:28Z' - torchscript_onnx_tflite: - inference_time: 17227.0 - throughput: 58.048412375921515 + inference_time: 17182.0 + throughput: 58.200442323361656 estimated_peak_memory_range: - min: 692224 - max: 3142992 + min: 630784 + max: 2844152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: j57ylz295 + job_id: jp3j468lg job_status: Passed torchscript_onnx_qnn: - inference_time: 10469.0 - throughput: 95.52010698251982 + inference_time: 10337.0 + throughput: 96.73986649898423 estimated_peak_memory_range: - min: 4960256 - max: 6204416 + min: 5001216 + max: 6250920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: j5mn6re9p + job_id: j5wed4835 job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:02:04Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T21:50:29Z' - torchscript_onnx_tflite: - inference_time: 16938.0 - throughput: 59.0388475616956 + inference_time: 17184.0 + throughput: 58.19366852886406 estimated_peak_memory_range: min: 647168 - max: 3094104 + max: 2974080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jp4ldqn15 + job_id: jgo218mxp job_status: Passed torchscript_onnx_qnn: - inference_time: 10472.0 - throughput: 95.49274255156608 + inference_time: 10343.0 + throughput: 96.68374746205163 estimated_peak_memory_range: - min: 4984832 - max: 6324384 + min: 4993024 + max: 6248752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jgn6m20q5 + job_id: jg9l3dkwg job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:02:05Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T21:50:30Z' - torchscript_onnx_tflite: - inference_time: 16971.0 - throughput: 58.92404690354134 + inference_time: 21595.0 + throughput: 46.3070155128502 estimated_peak_memory_range: - min: 663552 - max: 2863128 + min: 634880 + max: 76310032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jpxk6v9l5 + job_id: jpv6174j5 job_status: Passed torchscript_onnx_qnn: - inference_time: 10464.0 - throughput: 95.565749235474 + inference_time: 13697.0 + throughput: 73.00868803387603 estimated_peak_memory_range: - min: 4960256 - max: 6419688 + min: 45056 + max: 5750768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jprv2k67g + job_id: jp14d678p job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:02:06Z' + chipset: SA8295P + timestamp: '2024-11-09T21:50:31Z' - torchscript_onnx_tflite: - inference_time: 19509.0 - throughput: 51.258393561945766 + inference_time: 19511.0 + throughput: 51.253139254779356 estimated_peak_memory_range: - min: 0 - max: 108323152 + min: 663552 + max: 107755840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jp2k08exp + job_id: jgjv0q1xg job_status: Passed torchscript_onnx_qnn: - inference_time: 12590.0 - throughput: 79.42811755361397 + inference_time: 12655.0 + throughput: 79.02015013828526 estimated_peak_memory_range: min: 4931584 - max: 63471088 + max: 65351792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jpv603775 + job_id: jgdxr28rp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:02:22Z' + timestamp: '2024-11-09T21:50:32Z' - torchscript_onnx_qnn: - inference_time: 10951.0 - throughput: 91.31586156515387 + inference_time: 10935.0 + throughput: 91.44947416552355 estimated_peak_memory_range: min: 4923392 max: 4923392 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jglv6nle5 + job_id: jp14d672p job_status: Passed torchscript_onnx: - inference_time: 14164.0 - throughput: 70.60152499293984 + inference_time: 14100.0 + throughput: 70.92198581560284 estimated_peak_memory_range: - min: 9768960 - max: 9768960 + min: 9736192 + max: 9736192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 222 - job_id: jg9ljvdqg + job_id: j5mnw8odp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:02:27Z' + timestamp: '2024-11-09T21:50:37Z' diff --git a/qai_hub_models/models/yolov7/requirements.txt b/qai_hub_models/models/yolov7/requirements.txt index b7a4117f..f87ce596 100644 --- a/qai_hub_models/models/yolov7/requirements.txt +++ b/qai_hub_models/models/yolov7/requirements.txt @@ -1,3 +1,4 @@ object-detection-metrics==0.4.post1 seaborn==0.11.0 shapely==2.0.3 +fiftyone==1.0.1 diff --git a/qai_hub_models/models/yolov7_quantized/README.md b/qai_hub_models/models/yolov7_quantized/README.md index a271f87c..a75a4667 100644 --- a/qai_hub_models/models/yolov7_quantized/README.md +++ b/qai_hub_models/models/yolov7_quantized/README.md @@ -5,8 +5,7 @@ YoloV7 is a machine learning model that predicts bounding boxes and classes of objects in an image. This model is post-training quantized to int8 using samples from the COCO dataset. -This is based on the implementation of Yolo-v7-Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/yolov7_quantized). diff --git a/qai_hub_models/models/yolov7_quantized/evaluate.py b/qai_hub_models/models/yolov7_quantized/evaluate.py new file mode 100644 index 00000000..e9544d49 --- /dev/null +++ b/qai_hub_models/models/yolov7_quantized/evaluate.py @@ -0,0 +1,62 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.yolov7_quantized import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.base_model import BaseModel +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args +from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin + +SUPPORTED_DATASETS = ["coco"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=250, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + + # Use Fp16 model for torch inference + for cls in Model.__mro__: + if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin): + torch_cls = cls + break + torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/yolov7_quantized/perf.yaml b/qai_hub_models/models/yolov7_quantized/perf.yaml index d79ac50b..76b0e28c 100644 --- a/qai_hub_models/models/yolov7_quantized/perf.yaml +++ b/qai_hub_models/models/yolov7_quantized/perf.yaml @@ -49,64 +49,11 @@ models: - name: Yolo-v7-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 4411.0 - throughput: 226.70596236681024 + inference_time: 4408.0 + throughput: 226.86025408348456 estimated_peak_memory_range: - min: 167936 - max: 2441376 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 229 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 229 - job_id: jpede9885 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 4820.0 - throughput: 207.4688796680498 - estimated_peak_memory_range: - min: 16384 - max: 10814640 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 221 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 221 - job_id: jpxknv3j5 - job_status: Passed - torchscript_onnx: - inference_time: 7433.0 - throughput: 134.53518094981837 - estimated_peak_memory_range: - min: 20480 - max: 9979096 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 253 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 253 - job_id: jp3jvkwxg - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:01:41Z' - - torchscript_onnx_tflite: - inference_time: 4388.0 - throughput: 227.89425706472196 - estimated_peak_memory_range: - min: 0 - max: 2732392 + min: 184320 + max: 16200776 primary_compute_unit: NPU precision: int8 layer_info: @@ -114,14 +61,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 229 - job_id: jgz3oe845 + job_id: jpxk7xe15 job_status: Passed torchscript_onnx_qnn: - inference_time: 4837.0 - throughput: 206.7397146991937 + inference_time: 4823.0 + throughput: 207.33982998133942 estimated_peak_memory_range: - min: 1269760 - max: 11107544 + min: 12288 + max: 11354840 primary_compute_unit: NPU precision: int8 layer_info: @@ -129,14 +76,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: j5mnqroyp + job_id: j56y3wq0p job_status: Passed torchscript_onnx: - inference_time: 7414.0 - throughput: 134.87995683841382 + inference_time: 7453.0 + throughput: 134.1741580571582 estimated_peak_memory_range: - min: 36864 - max: 28617032 + min: 12288 + max: 11448240 primary_compute_unit: NPU precision: int8 layer_info: @@ -144,7 +91,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 253 - job_id: jgo2ky44p + job_id: j57yj96l5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -153,13 +100,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:01:42Z' + timestamp: '2024-11-09T21:49:50Z' - torchscript_onnx_tflite: - inference_time: 2838.0 - throughput: 352.36081747709653 + inference_time: 2814.0 + throughput: 355.36602700781805 estimated_peak_memory_range: min: 12288 - max: 76351456 + max: 77221424 primary_compute_unit: NPU precision: int8 layer_info: @@ -167,14 +114,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 229 - job_id: j5we2o845 + job_id: j5mnw8vwp job_status: Passed torchscript_onnx_qnn: - inference_time: 3873.0 - throughput: 258.1977794990963 + inference_time: 3178.0 + throughput: 314.6633102580239 estimated_peak_memory_range: min: 1245184 - max: 59607968 + max: 57670544 primary_compute_unit: NPU precision: int8 layer_info: @@ -182,14 +129,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jgn6l27v5 + job_id: jp3j46qlg job_status: Passed torchscript_onnx: - inference_time: 5442.0 - throughput: 183.75597206909225 + inference_time: 5413.0 + throughput: 184.74043968224643 estimated_peak_memory_range: - min: 307200 - max: 127215040 + min: 1589248 + max: 128279648 primary_compute_unit: NPU precision: int8 layer_info: @@ -197,7 +144,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 253 - job_id: jpv603975 + job_id: jp4lx38v5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -206,13 +153,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:01:43Z' + timestamp: '2024-11-09T21:49:51Z' - torchscript_onnx_tflite: - inference_time: 2403.0 - throughput: 416.1464835622139 + inference_time: 2888.0 + throughput: 346.26038781163436 estimated_peak_memory_range: - min: 8192 - max: 54490320 + min: 159744 + max: 54886752 primary_compute_unit: NPU precision: int8 layer_info: @@ -220,14 +167,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 229 - job_id: j57y0zkq5 + job_id: jgn69krr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3343.0 - throughput: 299.1325157044571 + inference_time: 3348.0 + throughput: 298.6857825567503 estimated_peak_memory_range: min: 1241088 - max: 51850864 + max: 51469184 primary_compute_unit: NPU precision: int8 layer_info: @@ -235,14 +182,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jglv6n1e5 + job_id: jgo218exp job_status: Passed torchscript_onnx: - inference_time: 5201.0 - throughput: 192.27071716977505 + inference_time: 5178.0 + throughput: 193.12475859405177 estimated_peak_memory_range: - min: 1552384 - max: 93578320 + min: 81920 + max: 93343248 primary_compute_unit: NPU precision: int8 layer_info: @@ -250,7 +197,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 253 - job_id: jpede9l75 + job_id: jpxk7xm15 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -259,13 +206,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:01:45Z' + timestamp: '2024-11-09T21:49:52Z' - torchscript_onnx_tflite: - inference_time: 9884.0 - throughput: 101.17361392148928 + inference_time: 9984.0 + throughput: 100.16025641025641 estimated_peak_memory_range: min: 159744 - max: 74278112 + max: 73593984 primary_compute_unit: NPU precision: int8 layer_info: @@ -273,14 +220,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 229 - job_id: jg9ljvkmg + job_id: jprv4w19g job_status: Passed torchscript_onnx_qnn: - inference_time: 13180.0 - throughput: 75.87253414264036 + inference_time: 13086.0 + throughput: 76.41754546843956 estimated_peak_memory_range: - min: 1245184 - max: 8969664 + min: 1277952 + max: 8839728 primary_compute_unit: NPU precision: int8 layer_info: @@ -288,7 +235,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jprv8knvg + job_id: jpv617zj5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -297,13 +244,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-26T23:01:31Z' + timestamp: '2024-11-09T21:49:42Z' - torchscript_onnx_tflite: - inference_time: 93788.0 - throughput: 10.662344862882245 + inference_time: 94686.0 + throughput: 10.56122341212006 estimated_peak_memory_range: - min: 3915776 - max: 38732672 + min: 3989504 + max: 8049032 primary_compute_unit: GPU precision: int8 layer_info: @@ -311,7 +258,7 @@ models: layers_on_gpu: 127 layers_on_cpu: 69 total_layers: 229 - job_id: jp14y07np + job_id: jp2k7e34p job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -320,13 +267,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-26T23:01:20Z' + timestamp: '2024-11-09T21:49:31Z' - torchscript_onnx_tflite: - inference_time: 4378.0 - throughput: 228.41480127912288 + inference_time: 4356.0 + throughput: 229.5684113865932 estimated_peak_memory_range: - min: 180224 - max: 1823568 + min: 188416 + max: 1707216 primary_compute_unit: NPU precision: int8 layer_info: @@ -334,14 +281,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 229 - job_id: jgdxew86p + job_id: jpy14mv7p job_status: Passed torchscript_onnx_qnn: - inference_time: 3753.0 - throughput: 266.4535038635758 + inference_time: 3739.0 + throughput: 267.4511901577962 estimated_peak_memory_range: min: 1265664 - max: 2481824 + max: 2441624 primary_compute_unit: NPU precision: int8 layer_info: @@ -349,7 +296,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jp2k08vxp + job_id: jgjv0qkxg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -358,13 +305,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:01:33Z' + timestamp: '2024-11-09T21:49:43Z' - torchscript_onnx_tflite: - inference_time: 4420.0 - throughput: 226.2443438914027 + inference_time: 4376.0 + throughput: 228.51919561243145 estimated_peak_memory_range: min: 184320 - max: 3173264 + max: 3654240 primary_compute_unit: NPU precision: int8 layer_info: @@ -372,14 +319,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 229 - job_id: jglv2n4m5 + job_id: jp0z16e65 job_status: Passed torchscript_onnx_qnn: - inference_time: 3736.0 - throughput: 267.6659528907923 + inference_time: 3771.0 + throughput: 265.1816494298595 estimated_peak_memory_range: min: 1265664 - max: 2424256 + max: 2712080 primary_compute_unit: NPU precision: int8 layer_info: @@ -387,7 +334,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jpv6r3qr5 + job_id: jgz3xnvk5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -396,13 +343,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:01:42Z' + timestamp: '2024-11-09T21:49:45Z' - torchscript_onnx_tflite: - inference_time: 4377.0 - throughput: 228.4669865204478 + inference_time: 4381.0 + throughput: 228.2583884957772 estimated_peak_memory_range: - min: 167936 - max: 149657336 + min: 180224 + max: 150857944 primary_compute_unit: NPU precision: int8 layer_info: @@ -410,14 +357,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 229 - job_id: j56yz62yp + job_id: jp8q31wxp job_status: Passed torchscript_onnx_qnn: - inference_time: 3754.0 - throughput: 266.3825253063399 + inference_time: 3780.0 + throughput: 264.55026455026456 estimated_peak_memory_range: - min: 1265664 - max: 2606752 + min: 1282048 + max: 2808040 primary_compute_unit: NPU precision: int8 layer_info: @@ -425,7 +372,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jgjv2xdeg + job_id: j5wed4x65 job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -434,13 +381,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:01:43Z' + timestamp: '2024-11-09T21:49:46Z' - torchscript_onnx_tflite: - inference_time: 4357.0 - throughput: 229.51572182694514 + inference_time: 4398.0 + throughput: 227.37608003638016 estimated_peak_memory_range: - min: 167936 - max: 1951480 + min: 16384 + max: 1927136 primary_compute_unit: NPU precision: int8 layer_info: @@ -448,14 +395,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 229 - job_id: jgo2nyzkp + job_id: jgkel8r2g job_status: Passed torchscript_onnx_qnn: - inference_time: 3781.0 - throughput: 264.48029621793177 + inference_time: 3761.0 + throughput: 265.8867322520606 estimated_peak_memory_range: - min: 1294336 - max: 2510432 + min: 1306624 + max: 2557544 primary_compute_unit: NPU precision: int8 layer_info: @@ -463,7 +410,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jgz3je2x5 + job_id: jg9l3d8lg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -472,13 +419,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:01:45Z' + timestamp: '2024-11-09T21:49:47Z' - torchscript_onnx_tflite: - inference_time: 6051.0 - throughput: 165.26194017517767 + inference_time: 6058.0 + throughput: 165.0709805216243 estimated_peak_memory_range: min: 184320 - max: 55393360 + max: 55379888 primary_compute_unit: NPU precision: int8 layer_info: @@ -486,14 +433,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 229 - job_id: jp3j1knng + job_id: j5q67v94p job_status: Passed torchscript_onnx_qnn: - inference_time: 5351.0 - throughput: 186.88095683049897 + inference_time: 5157.0 + throughput: 193.91118867558657 estimated_peak_memory_range: min: 1282048 - max: 6951072 + max: 6951776 primary_compute_unit: NPU precision: int8 layer_info: @@ -501,7 +448,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jpedw9ov5 + job_id: jp14d632p job_status: Passed reference_device_info: name: SA8295P ADP @@ -510,13 +457,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T01:01:44Z' + timestamp: '2024-11-09T21:49:48Z' - torchscript_onnx_tflite: - inference_time: 4964.0 - throughput: 201.45044319097502 + inference_time: 5031.0 + throughput: 198.76764062810574 estimated_peak_memory_range: - min: 204800 - max: 80730384 + min: 163840 + max: 81985776 primary_compute_unit: NPU precision: int8 layer_info: @@ -524,14 +471,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 229 - job_id: jgdxew8kp + job_id: jglv0le85 job_status: Passed torchscript_onnx_qnn: - inference_time: 4617.0 - throughput: 216.59085986571367 + inference_time: 4638.0 + throughput: 215.610176800345 estimated_peak_memory_range: min: 1245184 - max: 62645568 + max: 61084848 primary_compute_unit: NPU precision: int8 layer_info: @@ -539,7 +486,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: j5q6e8m7p + job_id: jgdxr20ep job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -548,10 +495,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:01:38Z' + timestamp: '2024-11-09T21:49:49Z' - torchscript_onnx_qnn: - inference_time: 4137.0 - throughput: 241.72105390379502 + inference_time: 4153.0 + throughput: 240.78979051288226 estimated_peak_memory_range: min: 1232896 max: 1232896 @@ -562,14 +509,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jpy1re7rp + job_id: jpedry415 job_status: Passed torchscript_onnx: inference_time: 9170.0 throughput: 109.05125408942203 estimated_peak_memory_range: - min: 7090176 - max: 7090176 + min: 6918144 + max: 6918144 primary_compute_unit: NPU precision: int8 layer_info: @@ -577,7 +524,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 253 - job_id: jgjvzxw7g + job_id: j5mnw84wp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -586,4 +533,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:01:44Z' + timestamp: '2024-11-09T21:49:54Z' diff --git a/qai_hub_models/models/yolov7_quantized/requirements.txt b/qai_hub_models/models/yolov7_quantized/requirements.txt index 2b9fad06..707f8b70 100644 --- a/qai_hub_models/models/yolov7_quantized/requirements.txt +++ b/qai_hub_models/models/yolov7_quantized/requirements.txt @@ -2,3 +2,4 @@ aimet-torch==1.32.1.post1; sys_platform == "linux" and python_version == "3.10" object-detection-metrics==0.4.post1 seaborn==0.11.0 shapely==2.0.3 +fiftyone==1.0.1 diff --git a/qai_hub_models/models/yolov8_det/README.md b/qai_hub_models/models/yolov8_det/README.md index 141d16c8..48707437 100644 --- a/qai_hub_models/models/yolov8_det/README.md +++ b/qai_hub_models/models/yolov8_det/README.md @@ -5,8 +5,7 @@ Ultralytics YOLOv8 is a machine learning model that predicts bounding boxes and classes of objects in an image. -This is based on the implementation of YOLOv8-Detection found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/yolov8_det). diff --git a/qai_hub_models/models/yolov8_det/evaluate.py b/qai_hub_models/models/yolov8_det/evaluate.py new file mode 100644 index 00000000..dfc97ca8 --- /dev/null +++ b/qai_hub_models/models/yolov8_det/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.yolov8_det import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["coco"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=250, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/yolov8_det/perf.yaml b/qai_hub_models/models/yolov8_det/perf.yaml index 54c22799..e6520d4a 100644 --- a/qai_hub_models/models/yolov8_det/perf.yaml +++ b/qai_hub_models/models/yolov8_det/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: YOLOv8-Detection performance_metrics: - torchscript_onnx_tflite: - inference_time: 5232.0 - throughput: 191.131498470948 + inference_time: 5164.0 + throughput: 193.64833462432222 estimated_peak_memory_range: - min: 73728 - max: 2686568 + min: 28672 + max: 4538296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jp3jvk8mg + job_id: jp14d6d2p job_status: Passed torchscript_onnx_qnn: - inference_time: 5279.0 - throughput: 189.42981625307823 + inference_time: 5216.0 + throughput: 191.71779141104295 estimated_peak_memory_range: - min: 6324224 - max: 22830848 + min: 4935680 + max: 20680576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: jgdxew06p + job_id: jpy14m47p job_status: Passed torchscript_onnx: - inference_time: 6034.0 - throughput: 165.72754391779912 + inference_time: 6065.0 + throughput: 164.88046166529267 estimated_peak_memory_range: - min: 4931584 - max: 11476680 + min: 4956160 + max: 9960016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: jp8q0o6qp + job_id: jgjv0q0xg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:00:44Z' + timestamp: '2024-11-09T21:48:54Z' - torchscript_onnx_tflite: - inference_time: 5248.0 - throughput: 190.5487804878049 + inference_time: 3722.0 + throughput: 268.6727565824825 estimated_peak_memory_range: - min: 237568 - max: 3018240 + min: 12288 + max: 98088688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jgo2kym1p + job_id: jgdxr2rep job_status: Passed torchscript_onnx_qnn: - inference_time: 5263.0 - throughput: 190.00570017100512 + inference_time: 3708.0 + throughput: 269.6871628910464 estimated_peak_memory_range: - min: 6320128 - max: 22307104 + min: 4931584 + max: 58761456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: j57y0z6n5 + job_id: jp0z16165 job_status: Passed torchscript_onnx: - inference_time: 6012.0 - throughput: 166.333998669328 + inference_time: 4087.0 + throughput: 244.67824810374358 estimated_peak_memory_range: - min: 4956160 - max: 10565384 + min: 5341184 + max: 122665792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: jgke7zovg + job_id: jpedryr15 job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T23:00:45Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T21:48:55Z' - torchscript_onnx_tflite: - inference_time: 3842.0 - throughput: 260.28110359187923 + inference_time: 3674.0 + throughput: 272.18290691344583 estimated_peak_memory_range: - min: 12288 - max: 96222704 + min: 212992 + max: 61725456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jpv6034z5 + job_id: j57yj9jl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 4466.0 - throughput: 223.91401701746528 + inference_time: 3606.0 + throughput: 277.31558513588465 estimated_peak_memory_range: - min: 4931584 - max: 54459792 + min: 4927488 + max: 50252016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: jp4lkq825 + job_id: jp8q313xp job_status: Passed torchscript_onnx: - inference_time: 4342.0 - throughput: 230.30861354214647 + inference_time: 3977.0 + throughput: 251.44581342720645 estimated_peak_memory_range: - min: 5357568 - max: 119671840 + min: 5345280 + max: 75622112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: j5q6e8zep + job_id: jgz3xnxk5 job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T23:00:46Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T21:48:56Z' - torchscript_onnx_tflite: - inference_time: 3693.0 - throughput: 270.7825616030328 + inference_time: 5146.0 + throughput: 194.32568985619898 estimated_peak_memory_range: - min: 8192 - max: 61375840 + min: 4927488 + max: 6550968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jp14y03np + job_id: jp4lx3xv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3153.0 - throughput: 317.1582619727244 + inference_time: 4992.0 + throughput: 200.32051282051282 estimated_peak_memory_range: - min: 0 - max: 45630528 + min: 5009408 + max: 6143184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: jp0z3yd05 - job_status: Passed - torchscript_onnx: - inference_time: 3978.0 - throughput: 251.38260432378078 - estimated_peak_memory_range: - min: 5365760 - max: 75379984 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 286 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 286 - job_id: j56ye6rnp + job_id: jgkel8l2g job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T23:00:48Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T21:48:47Z' - torchscript_onnx_tflite: - inference_time: 5160.0 - throughput: 193.7984496124031 + inference_time: 5177.0 + throughput: 193.16206297083252 estimated_peak_memory_range: - min: 258048 - max: 1999424 + min: 20480 + max: 4748072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jgjvzx11g + job_id: jpxk7x715 job_status: Passed torchscript_onnx_qnn: - inference_time: 5002.0 - throughput: 199.9200319872051 + inference_time: 5011.0 + throughput: 199.56096587507483 estimated_peak_memory_range: - min: 4964352 - max: 6152264 + min: 6217728 + max: 7671344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: jpxknvm85 + job_id: jglv0l085 job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T23:00:36Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T21:48:49Z' - torchscript_onnx_tflite: - inference_time: 5251.0 - throughput: 190.43991620643686 + inference_time: 5171.0 + throughput: 193.38619222587508 estimated_peak_memory_range: - min: 53248 - max: 2595720 + min: 221184 + max: 16602896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jpxk6vkl5 + job_id: j5mnw8wwp job_status: Passed torchscript_onnx_qnn: - inference_time: 5039.0 - throughput: 198.45207382417146 + inference_time: 5034.0 + throughput: 198.64918553833928 estimated_peak_memory_range: - min: 4952064 - max: 6651984 + min: 5001216 + max: 6376144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: jprv2kv7g + job_id: j56y3w30p job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:01:10Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T21:48:50Z' - torchscript_onnx_tflite: - inference_time: 5204.0 - throughput: 192.1598770176787 + inference_time: 5191.0 + throughput: 192.64110961279138 estimated_peak_memory_range: - min: 2117632 - max: 4703464 + min: 274432 + max: 2042712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: j5mn6rn9p + job_id: jgn69k9r5 job_status: Passed torchscript_onnx_qnn: - inference_time: 5056.0 - throughput: 197.78481012658227 + inference_time: 5003.0 + throughput: 199.8800719568259 estimated_peak_memory_range: - min: 5001216 - max: 6550704 + min: 4956160 + max: 6653952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: jp2k98kqp + job_id: jp3j464lg job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:01:11Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T21:48:51Z' - torchscript_onnx_tflite: - inference_time: 5163.0 - throughput: 193.6858415649816 + inference_time: 9914.0 + throughput: 100.86746015735324 estimated_peak_memory_range: - min: 258048 - max: 2692928 + min: 233472 + max: 53991872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jgn6m26q5 + job_id: jprv4w49g job_status: Passed torchscript_onnx_qnn: - inference_time: 5008.0 - throughput: 199.68051118210863 + inference_time: 8548.0 + throughput: 116.98642957416939 estimated_peak_memory_range: - min: 4960256 - max: 6248976 + min: 53248 + max: 5646640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: jp0z2yzn5 + job_id: jgo2181xp job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:01:12Z' + chipset: SA8295P + timestamp: '2024-11-09T21:48:52Z' - torchscript_onnx_tflite: - inference_time: 8735.0 - throughput: 114.48196908986834 + inference_time: 8700.0 + throughput: 114.94252873563218 estimated_peak_memory_range: - min: 237568 - max: 88281216 + min: 258048 + max: 86803520 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jg9ljv8mg + job_id: jp2k7e74p job_status: Passed torchscript_onnx_qnn: - inference_time: 7870.0 - throughput: 127.06480304955528 + inference_time: 7915.0 + throughput: 126.34238787113077 estimated_peak_memory_range: - min: 4935680 - max: 41021840 + min: 4919296 + max: 40582320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: jpy1ren0p + job_id: jpv6171j5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T23:00:42Z' + timestamp: '2024-11-09T21:48:54Z' - torchscript_onnx_qnn: - inference_time: 5534.0 - throughput: 180.70112034694614 + inference_time: 5532.0 + throughput: 180.76644974692698 estimated_peak_memory_range: min: 4923392 max: 4923392 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: j5mnqr47p + job_id: j5q67v74p job_status: Passed torchscript_onnx: - inference_time: 6755.0 - throughput: 148.03849000740192 + inference_time: 6700.0 + throughput: 149.2537313432836 estimated_peak_memory_range: - min: 4931584 - max: 4931584 + min: 5160960 + max: 5160960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: jglv6no25 + job_id: j5wed4m65 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T23:00:47Z' + timestamp: '2024-11-09T21:48:58Z' diff --git a/qai_hub_models/models/yolov8_det/requirements.txt b/qai_hub_models/models/yolov8_det/requirements.txt index 158fe9bd..cd883e1a 100644 --- a/qai_hub_models/models/yolov8_det/requirements.txt +++ b/qai_hub_models/models/yolov8_det/requirements.txt @@ -3,3 +3,4 @@ seaborn==0.11.0 thop==0.1.1.post2209072238 ultralytics==8.0.193 shapely==2.0.3 +fiftyone==1.0.1 diff --git a/qai_hub_models/models/yolov8_det_quantized/README.md b/qai_hub_models/models/yolov8_det_quantized/README.md index 81e68ba0..8066e76a 100644 --- a/qai_hub_models/models/yolov8_det_quantized/README.md +++ b/qai_hub_models/models/yolov8_det_quantized/README.md @@ -5,8 +5,7 @@ Ultralytics YOLOv8 is a machine learning model that predicts bounding boxes and classes of objects in an image. This model is post-training quantized to int8 using samples from the COCO dataset. -This is based on the implementation of YOLOv8-Detection-Quantized found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/yolov8_det_quantized). diff --git a/qai_hub_models/models/yolov8_det_quantized/evaluate.py b/qai_hub_models/models/yolov8_det_quantized/evaluate.py new file mode 100644 index 00000000..31581e2a --- /dev/null +++ b/qai_hub_models/models/yolov8_det_quantized/evaluate.py @@ -0,0 +1,62 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.yolov8_det_quantized import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.base_model import BaseModel +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args +from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin + +SUPPORTED_DATASETS = ["coco"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=250, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + + # Use Fp16 model for torch inference + for cls in Model.__mro__: + if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin): + torch_cls = cls + break + torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/yolov8_det_quantized/perf.yaml b/qai_hub_models/models/yolov8_det_quantized/perf.yaml index 656f7fac..6bdcb35f 100644 --- a/qai_hub_models/models/yolov8_det_quantized/perf.yaml +++ b/qai_hub_models/models/yolov8_det_quantized/perf.yaml @@ -49,64 +49,11 @@ models: - name: YOLOv8-Detection-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1907.0 - throughput: 524.3838489774515 - estimated_peak_memory_range: - min: 24576 - max: 108511120 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 278 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 278 - job_id: jgz3rxjk5 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 2260.0 - throughput: 442.4778761061947 + inference_time: 1912.0 + throughput: 523.0125523012553 estimated_peak_memory_range: min: 12288 - max: 9692360 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 273 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 273 - job_id: j5weqdq65 - job_status: Passed - torchscript_onnx: - inference_time: 5636.0 - throughput: 177.43080198722498 - estimated_peak_memory_range: - min: 4808704 - max: 7993256 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 323 - layers_on_gpu: 0 - layers_on_cpu: 8 - total_layers: 331 - job_id: jp0zl1l65 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:37:19Z' - - torchscript_onnx_tflite: - inference_time: 1909.0 - throughput: 523.8344683080147 - estimated_peak_memory_range: - min: 12288 - max: 1865400 + max: 95651088 primary_compute_unit: NPU precision: int8 layer_info: @@ -114,14 +61,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 278 - job_id: jp2k0876p + job_id: j5wed4qj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1957.0 - throughput: 510.98620337250895 + inference_time: 2249.0 + throughput: 444.642063139173 estimated_peak_memory_range: - min: 1265664 - max: 2571464 + min: 1249280 + max: 8928416 primary_compute_unit: NPU precision: int8 layer_info: @@ -129,14 +76,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 273 - job_id: jg9lw3wlg + job_id: j5mnw8zwp job_status: Passed torchscript_onnx: - inference_time: 5611.0 - throughput: 178.22135091783997 + inference_time: 5532.0 + throughput: 180.76644974692698 estimated_peak_memory_range: - min: 4317184 - max: 7877176 + min: 3387392 + max: 10618592 primary_compute_unit: NPU precision: int8 layer_info: @@ -144,7 +91,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 8 total_layers: 331 - job_id: jp2k0836p + job_id: jp3j46mlg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -153,13 +100,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-31T14:37:09Z' + timestamp: '2024-11-09T21:48:09Z' - torchscript_onnx_tflite: - inference_time: 1281.0 - throughput: 780.64012490242 + inference_time: 1275.0 + throughput: 784.3137254901961 estimated_peak_memory_range: min: 12288 - max: 59234048 + max: 59842048 primary_compute_unit: NPU precision: int8 layer_info: @@ -167,14 +114,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 278 - job_id: jpy1re40p + job_id: jg9l3dwvg job_status: Passed torchscript_onnx_qnn: - inference_time: 1383.0 - throughput: 723.0657989877079 + inference_time: 1495.0 + throughput: 668.8963210702341 estimated_peak_memory_range: - min: 1249280 - max: 9758192 + min: 0 + max: 29599360 primary_compute_unit: NPU precision: int8 layer_info: @@ -182,14 +129,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 273 - job_id: jp14ede2p + job_id: jgn69ker5 job_status: Passed torchscript_onnx: - inference_time: 4080.0 - throughput: 245.09803921568627 + inference_time: 4081.0 + throughput: 245.0379808870375 estimated_peak_memory_range: - min: 6492160 - max: 162398368 + min: 7376896 + max: 166956272 primary_compute_unit: NPU precision: int8 layer_info: @@ -197,7 +144,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 8 total_layers: 331 - job_id: jpy1rev0p + job_id: jgo218vxp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -206,13 +153,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-31T14:37:10Z' + timestamp: '2024-11-09T21:48:10Z' - torchscript_onnx_tflite: - inference_time: 1014.0 - throughput: 986.1932938856016 + inference_time: 1007.0 + throughput: 993.0486593843099 estimated_peak_memory_range: min: 8192 - max: 39373984 + max: 39477760 primary_compute_unit: NPU precision: int8 layer_info: @@ -220,14 +167,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 278 - job_id: jgo2kye1p + job_id: jp14d6elp job_status: Passed torchscript_onnx_qnn: inference_time: 1421.0 throughput: 703.7297677691766 estimated_peak_memory_range: min: 0 - max: 26294064 + max: 25800192 primary_compute_unit: NPU precision: int8 layer_info: @@ -235,14 +182,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 273 - job_id: jgdxoroep + job_id: jprv4wy9g job_status: Passed torchscript_onnx: - inference_time: 3989.0 - throughput: 250.68939583855604 + inference_time: 3975.0 + throughput: 251.57232704402514 estimated_peak_memory_range: - min: 6123520 - max: 127381456 + min: 3239936 + max: 124542464 primary_compute_unit: NPU precision: int8 layer_info: @@ -250,7 +197,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 8 total_layers: 331 - job_id: jp8q0o8qp + job_id: jpv617wj5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -259,13 +206,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-10-31T14:37:11Z' + timestamp: '2024-11-09T21:48:11Z' - torchscript_onnx_tflite: - inference_time: 4534.0 - throughput: 220.55580061755623 + inference_time: 4440.0 + throughput: 225.22522522522522 estimated_peak_memory_range: min: 61440 - max: 44229744 + max: 43852304 primary_compute_unit: NPU precision: int8 layer_info: @@ -273,14 +220,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 278 - job_id: jp0z3ye05 + job_id: jgdxr2olp job_status: Passed torchscript_onnx_qnn: - inference_time: 5856.0 - throughput: 170.76502732240436 + inference_time: 5809.0 + throughput: 172.14666896195558 estimated_peak_memory_range: - min: 1290240 - max: 9528032 + min: 1294336 + max: 9542048 primary_compute_unit: NPU precision: int8 layer_info: @@ -288,7 +235,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 273 - job_id: j57yxjxl5 + job_id: jp2k7em4p job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -297,13 +244,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-10-31T14:37:12Z' + timestamp: '2024-11-09T21:48:00Z' - torchscript_onnx_tflite: - inference_time: 46524.0 - throughput: 21.494282520849453 + inference_time: 44972.0 + throughput: 22.236057991639242 estimated_peak_memory_range: - min: 1880064 - max: 16882560 + min: 2957312 + max: 11589384 primary_compute_unit: NPU precision: int8 layer_info: @@ -311,7 +258,7 @@ models: layers_on_gpu: 1 layers_on_cpu: 0 total_layers: 278 - job_id: jp8q0owqp + job_id: j5wed4965 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -320,13 +267,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-10-26T22:59:35Z' + timestamp: '2024-11-09T21:47:50Z' - torchscript_onnx_tflite: - inference_time: 1895.0 - throughput: 527.7044854881267 + inference_time: 1890.0 + throughput: 529.1005291005291 estimated_peak_memory_range: min: 12288 - max: 1591392 + max: 1479680 primary_compute_unit: NPU precision: int8 layer_info: @@ -334,14 +281,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 278 - job_id: jgke7zrvg + job_id: jg9l3d4lg job_status: Passed torchscript_onnx_qnn: - inference_time: 1956.0 - throughput: 511.2474437627812 + inference_time: 1950.0 + throughput: 512.8205128205128 estimated_peak_memory_range: - min: 1298432 - max: 2596904 + min: 1261568 + max: 2424264 primary_compute_unit: NPU precision: int8 layer_info: @@ -349,7 +296,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 273 - job_id: jp4lvxvv5 + job_id: jpy14md7p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -358,13 +305,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-10-31T14:37:13Z' + timestamp: '2024-11-09T21:48:01Z' - torchscript_onnx_tflite: - inference_time: 1907.0 - throughput: 524.3838489774515 + inference_time: 1900.0 + throughput: 526.3157894736842 estimated_peak_memory_range: min: 16384 - max: 2436728 + max: 2597488 primary_compute_unit: NPU precision: int8 layer_info: @@ -372,14 +319,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 278 - job_id: jgz3je3o5 + job_id: jp14d682p job_status: Passed torchscript_onnx_qnn: - inference_time: 1958.0 - throughput: 510.7252298263534 + inference_time: 1967.0 + throughput: 508.38840874428064 estimated_peak_memory_range: - min: 1286144 - max: 2598912 + min: 1277952 + max: 2810312 primary_compute_unit: NPU precision: int8 layer_info: @@ -387,7 +334,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 273 - job_id: jgdxqwxrp + job_id: jp8q317xp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -396,13 +343,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-10-30T01:00:44Z' + timestamp: '2024-11-09T21:48:03Z' - torchscript_onnx_tflite: - inference_time: 1907.0 - throughput: 524.3838489774515 + inference_time: 1911.0 + throughput: 523.2862375719518 estimated_peak_memory_range: min: 12288 - max: 8191904 + max: 8396640 primary_compute_unit: NPU precision: int8 layer_info: @@ -410,14 +357,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 278 - job_id: j5we3oe35 + job_id: jgdxr2vep job_status: Passed torchscript_onnx_qnn: - inference_time: 1948.0 - throughput: 513.347022587269 + inference_time: 1950.0 + throughput: 512.8205128205128 estimated_peak_memory_range: - min: 1265664 - max: 2609232 + min: 1257472 + max: 2586736 primary_compute_unit: NPU precision: int8 layer_info: @@ -425,7 +372,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 273 - job_id: j5we3oem5 + job_id: jgkel8y2g job_status: Passed reference_device_info: name: SA8775 (Proxy) @@ -434,13 +381,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P Proxy - timestamp: '2024-10-30T01:00:45Z' + timestamp: '2024-11-09T21:48:04Z' - torchscript_onnx_tflite: - inference_time: 1901.0 - throughput: 526.0389268805892 + inference_time: 1907.0 + throughput: 524.3838489774515 estimated_peak_memory_range: min: 12288 - max: 8235376 + max: 33658688 primary_compute_unit: NPU precision: int8 layer_info: @@ -448,14 +395,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 278 - job_id: jp14w048p + job_id: j57yj9dl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1968.0 - throughput: 508.130081300813 + inference_time: 1952.0 + throughput: 512.2950819672132 estimated_peak_memory_range: min: 1269760 - max: 2683784 + max: 2587512 primary_compute_unit: NPU precision: int8 layer_info: @@ -463,7 +410,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 273 - job_id: jp14w047p + job_id: j5q67v24p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -472,13 +419,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-10-30T01:00:47Z' + timestamp: '2024-11-09T21:48:06Z' - torchscript_onnx_tflite: - inference_time: 2781.0 - throughput: 359.5828838547285 + inference_time: 2828.0 + throughput: 353.6067892503536 estimated_peak_memory_range: - min: 90112 - max: 38406064 + min: 106496 + max: 38965808 primary_compute_unit: NPU precision: int8 layer_info: @@ -486,14 +433,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 278 - job_id: jg9lyvlwg + job_id: jp4lx3wv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3234.0 - throughput: 309.2145949288806 + inference_time: 3069.0 + throughput: 325.8390355164549 estimated_peak_memory_range: min: 1286144 - max: 6919104 + max: 6909088 primary_compute_unit: NPU precision: int8 layer_info: @@ -501,7 +448,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 273 - job_id: jg9lyvl8g + job_id: jglv0lk85 job_status: Passed reference_device_info: name: SA8295P ADP @@ -510,13 +457,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-10-30T01:00:46Z' + timestamp: '2024-11-09T21:48:07Z' - torchscript_onnx_tflite: - inference_time: 2118.0 - throughput: 472.14353163361665 + inference_time: 2120.0 + throughput: 471.6981132075472 estimated_peak_memory_range: - min: 12288 - max: 60752560 + min: 16384 + max: 60053920 primary_compute_unit: NPU precision: int8 layer_info: @@ -524,14 +471,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 278 - job_id: jp3jvkqmg + job_id: jpxk7x115 job_status: Passed torchscript_onnx_qnn: - inference_time: 2410.0 - throughput: 414.9377593360996 + inference_time: 2502.0 + throughput: 399.68025579536373 estimated_peak_memory_range: - min: 1249280 - max: 34814704 + min: 1261568 + max: 31100224 primary_compute_unit: NPU precision: int8 layer_info: @@ -539,7 +486,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 273 - job_id: jpy16467p + job_id: j56y3w10p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -548,10 +495,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-31T14:37:18Z' + timestamp: '2024-11-09T21:48:08Z' - torchscript_onnx_qnn: - inference_time: 2269.0 - throughput: 440.72278536800354 + inference_time: 2279.0 + throughput: 438.7889425186485 estimated_peak_memory_range: min: 1232896 max: 1232896 @@ -562,14 +509,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 273 - job_id: jpxky7y15 + job_id: jp0z16r65 job_status: Passed torchscript_onnx: - inference_time: 6372.0 - throughput: 156.9365976145637 + inference_time: 6307.0 + throughput: 158.55398763278896 estimated_peak_memory_range: - min: 7868416 - max: 7868416 + min: 7835648 + max: 7835648 primary_compute_unit: NPU precision: int8 layer_info: @@ -577,7 +524,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 8 total_layers: 331 - job_id: jp0z3yk05 + job_id: jgjv0qlxg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -586,4 +533,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-31T14:37:13Z' + timestamp: '2024-11-09T21:48:12Z' diff --git a/qai_hub_models/models/yolov8_det_quantized/requirements.txt b/qai_hub_models/models/yolov8_det_quantized/requirements.txt index 26a20535..8bcc305e 100644 --- a/qai_hub_models/models/yolov8_det_quantized/requirements.txt +++ b/qai_hub_models/models/yolov8_det_quantized/requirements.txt @@ -4,3 +4,4 @@ seaborn==0.11.0 thop==0.1.1.post2209072238 ultralytics==8.0.193 shapely==2.0.3 +fiftyone==1.0.1 diff --git a/qai_hub_models/models/yolov8_seg/README.md b/qai_hub_models/models/yolov8_seg/README.md index 678d1662..7ea43e98 100644 --- a/qai_hub_models/models/yolov8_seg/README.md +++ b/qai_hub_models/models/yolov8_seg/README.md @@ -5,8 +5,7 @@ Ultralytics YOLOv8 is a machine learning model that predicts bounding boxes, segmentation masks and classes of objects in an image. -This is based on the implementation of YOLOv8-Segmentation found -[here]({source_repo}). This repository contains scripts for optimized on-device +{source_repo_details}This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/yolov8_seg). diff --git a/qai_hub_models/models/yolov8_seg/perf.yaml b/qai_hub_models/models/yolov8_seg/perf.yaml index f844d089..c7cd9c00 100644 --- a/qai_hub_models/models/yolov8_seg/perf.yaml +++ b/qai_hub_models/models/yolov8_seg/perf.yaml @@ -22,6 +22,7 @@ aggregated: - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - SA8255 (Proxy) + - SA8295P ADP - SA8650 (Proxy) - SA8775 (Proxy) supported_chipsets: @@ -35,17 +36,18 @@ aggregated: - QCS8450 Proxy - QCS8550 Proxy - SA8255P Proxy + - SA8295P - SA8650P Proxy - SA8775P Proxy models: - name: YOLOv8-Segmentation performance_metrics: - torchscript_onnx_tflite: - inference_time: 6462.0 - throughput: 154.75085112968122 + inference_time: 6580.0 + throughput: 151.9756838905775 estimated_peak_memory_range: - min: 4595712 - max: 14798792 + min: 4571136 + max: 6842792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -53,14 +55,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 338 - job_id: j57y0zdn5 + job_id: jpv617rm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 6426.0 - throughput: 155.6178026766262 + inference_time: 6435.0 + throughput: 155.4001554001554 estimated_peak_memory_range: - min: 6303744 - max: 17326464 + min: 4939776 + max: 19905024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -68,14 +70,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: jp8q0o3qp + job_id: jp4lx3vl5 job_status: Passed torchscript_onnx: - inference_time: 7662.0 - throughput: 130.5142260506395 + inference_time: 7637.0 + throughput: 130.941469163284 estimated_peak_memory_range: - min: 12283904 - max: 20248776 + min: 12255232 + max: 21217792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -83,7 +85,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 336 - job_id: jgz3oex45 + job_id: j5q67v3mp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -92,13 +94,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T22:58:50Z' + timestamp: '2024-11-09T21:47:02Z' - torchscript_onnx_tflite: - inference_time: 6525.0 - throughput: 153.25670498084293 + inference_time: 4674.0 + throughput: 213.94950791613178 estimated_peak_memory_range: - min: 6316032 - max: 9146752 + min: 4526080 + max: 123689040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -106,14 +108,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 338 - job_id: jp4lkqw25 + job_id: jgjv0q28g job_status: Passed torchscript_onnx_qnn: - inference_time: 6407.0 - throughput: 156.07928827844546 + inference_time: 4601.0 + throughput: 217.34405564007824 estimated_peak_memory_range: - min: 4947968 - max: 15455960 + min: 4931584 + max: 64296624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -121,14 +123,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: jgke7zlvg + job_id: jpxk7xy95 job_status: Passed torchscript_onnx: - inference_time: 7557.0 - throughput: 132.32764324467382 + inference_time: 5063.0 + throughput: 197.51135690302192 estimated_peak_memory_range: - min: 14303232 - max: 22704960 + min: 16527360 + max: 141460592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,22 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 336 - job_id: j5we2od45 + job_id: jglv0l3l5 job_status: Passed reference_device_info: - name: Samsung Galaxy S23 - os: '13' + name: Samsung Galaxy S24 + os: '14' form_factor: Phone os_name: Android manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-10-26T22:58:51Z' + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-11-09T21:47:03Z' - torchscript_onnx_tflite: - inference_time: 5606.0 - throughput: 178.38030681412772 + inference_time: 4509.0 + throughput: 221.77866489243735 estimated_peak_memory_range: - min: 2613248 - max: 117014768 + min: 3186688 + max: 77482800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,14 +161,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 338 - job_id: jpxknv185 + job_id: jpedryw05 job_status: Passed torchscript_onnx_qnn: - inference_time: 4785.0 - throughput: 208.9864158829676 + inference_time: 4372.0 + throughput: 228.72827081427263 estimated_peak_memory_range: - min: 4931584 - max: 61903616 + min: 4943872 + max: 58019776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: j5q6e87ep + job_id: j5mnw83qp job_status: Passed torchscript_onnx: - inference_time: 6002.0 - throughput: 166.61112962345885 + inference_time: 4896.0 + throughput: 204.2483660130719 estimated_peak_memory_range: - min: 4661248 - max: 124929264 + min: 0 + max: 74570432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -189,22 +191,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 336 - job_id: jg9ljv3mg + job_id: j56y3wn7p job_status: Passed reference_device_info: - name: Samsung Galaxy S24 - os: '14' + name: Snapdragon 8 Elite QRD + os: '15' form_factor: Phone os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-10-26T22:58:52Z' + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-11-09T21:47:04Z' - torchscript_onnx_tflite: - inference_time: 4519.0 - throughput: 221.2878955521133 + inference_time: 6559.0 + throughput: 152.46226558926665 estimated_peak_memory_range: - min: 3026944 - max: 77259616 + min: 1683456 + max: 10658880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -212,14 +214,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 338 - job_id: jp0z3y105 + job_id: jgz3xnj65 job_status: Passed torchscript_onnx_qnn: - inference_time: 4389.0 - throughput: 227.842333105491 + inference_time: 6321.0 + throughput: 158.202816010125 estimated_peak_memory_range: - min: 4927488 - max: 58148960 + min: 4947968 + max: 9065328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -227,37 +229,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: jpede9r85 - job_status: Passed - torchscript_onnx: - inference_time: 4869.0 - throughput: 205.38098172109264 - estimated_peak_memory_range: - min: 16568320 - max: 91351408 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 336 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 336 - job_id: jgdxewr6p + job_id: jgn69k3m5 job_status: Passed reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot os_name: Android manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-10-26T22:58:54Z' + chipset: QCS8550 Proxy + timestamp: '2024-11-09T21:46:55Z' - torchscript_onnx_tflite: - inference_time: 6587.0 - throughput: 151.8141794443601 + inference_time: 6473.0 + throughput: 154.4878727019929 estimated_peak_memory_range: - min: 4571136 - max: 7157200 + min: 12288 + max: 183635688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,14 +252,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 338 - job_id: j5mnqrz7p + job_id: j5wed43j5 job_status: Passed torchscript_onnx_qnn: - inference_time: 6361.0 - throughput: 157.2079861656972 + inference_time: 6295.0 + throughput: 158.85623510722795 estimated_peak_memory_range: - min: 4947968 - max: 11628936 + min: 4980736 + max: 11758592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,22 +267,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: jglv6n025 + job_id: jp2k7elmp job_status: Passed reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot + name: SA8255 (Proxy) + os: '13' + form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-10-26T22:58:43Z' + chipset: SA8255P Proxy + timestamp: '2024-11-09T21:46:57Z' - torchscript_onnx_tflite: - inference_time: 6587.0 - throughput: 151.8141794443601 + inference_time: 6453.0 + throughput: 154.9666821633349 estimated_peak_memory_range: - min: 4591616 - max: 7111472 + min: 4599808 + max: 7180096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -303,14 +290,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 338 - job_id: jp0z2y095 + job_id: jg9l3dyvg job_status: Passed torchscript_onnx_qnn: - inference_time: 6372.0 - throughput: 156.9365976145637 + inference_time: 6381.0 + throughput: 156.7152483936687 estimated_peak_memory_range: - min: 4964352 - max: 6483624 + min: 5001216 + max: 6327192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,22 +305,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: j5q6r8qnp + job_id: jpy14m64p job_status: Passed reference_device_info: - name: SA8255 (Proxy) + name: SA8775 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-10-30T01:00:02Z' + chipset: SA8775P Proxy + timestamp: '2024-11-09T21:46:58Z' - torchscript_onnx_tflite: - inference_time: 6538.0 - throughput: 152.95197308045275 + inference_time: 6424.0 + throughput: 155.6662515566625 estimated_peak_memory_range: - min: 4603904 - max: 216941736 + min: 1019904 + max: 2686336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -341,14 +328,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 338 - job_id: jp8qmoykp + job_id: jp14d6wlp job_status: Passed torchscript_onnx_qnn: - inference_time: 6283.0 - throughput: 159.15963711602737 + inference_time: 6278.0 + throughput: 159.28639694170118 estimated_peak_memory_range: min: 4964352 - max: 6312328 + max: 6266312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,22 +343,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: jglv2nmj5 + job_id: jp0z16le5 job_status: Passed reference_device_info: - name: SA8775 (Proxy) + name: SA8650 (Proxy) os: '13' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8775P Proxy - timestamp: '2024-10-30T01:00:03Z' + chipset: SA8650P Proxy + timestamp: '2024-11-09T21:46:59Z' - torchscript_onnx_tflite: - inference_time: 6559.0 - throughput: 152.46226558926665 + inference_time: 10892.0 + throughput: 91.8105031215571 estimated_peak_memory_range: - min: 4595712 - max: 6369272 + min: 4591616 + max: 67789136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -379,14 +366,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 338 - job_id: jgkeqzxwg + job_id: jgdxr2qlp job_status: Passed torchscript_onnx_qnn: - inference_time: 6399.0 - throughput: 156.2744178777934 + inference_time: 10437.0 + throughput: 95.81297307655457 estimated_peak_memory_range: - min: 4964352 - max: 7071448 + min: 61440 + max: 5655920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,22 +381,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: j56yz646p + job_id: jp8q31z8p job_status: Passed reference_device_info: - name: SA8650 (Proxy) - os: '13' + name: SA8295P ADP + os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-10-30T01:00:04Z' + chipset: SA8295P + timestamp: '2024-11-09T21:47:00Z' - torchscript_onnx_tflite: - inference_time: 9714.0 - throughput: 102.94420424130122 + inference_time: 9546.0 + throughput: 104.75591870940708 estimated_peak_memory_range: - min: 4603904 - max: 108737760 + min: 4616192 + max: 107973552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +404,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 338 - job_id: jpy1red0p + job_id: j57yj9xr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 9163.0 - throughput: 109.13456291607552 + inference_time: 9162.0 + throughput: 109.14647456887143 estimated_peak_memory_range: - min: 4931584 - max: 46677184 + min: 4952064 + max: 47018976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +419,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: jgjvzx01g + job_id: jgkel83og job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +428,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-10-26T22:58:48Z' + timestamp: '2024-11-09T21:47:01Z' - torchscript_onnx_qnn: - inference_time: 6840.0 - throughput: 146.19883040935673 + inference_time: 6852.0 + throughput: 145.94279042615295 estimated_peak_memory_range: min: 4923392 max: 4923392 @@ -455,14 +442,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: j56ye63np + job_id: jprv4weeg job_status: Passed torchscript_onnx: - inference_time: 7919.0 - throughput: 126.27857052658165 + inference_time: 7643.0 + throughput: 130.83867591259977 estimated_peak_memory_range: - min: 17473536 - max: 17473536 + min: 17412096 + max: 17412096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +457,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 336 - job_id: jp14y0dnp + job_id: jp3j46ezg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +466,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-10-26T22:58:53Z' + timestamp: '2024-11-09T21:47:05Z' diff --git a/qai_hub_models/requirements-dev.txt b/qai_hub_models/requirements-dev.txt index 2a38e235..c8c91d8e 100644 --- a/qai_hub_models/requirements-dev.txt +++ b/qai_hub_models/requirements-dev.txt @@ -19,4 +19,3 @@ keyrings.envvars==1.1.0 wheel==0.44.0 packaging>23,<24 huggingface-hub>=0.23.1,<0.24 -datasets==2.14.5 diff --git a/qai_hub_models/requirements.txt b/qai_hub_models/requirements.txt index 9f618c66..42aac9c6 100644 --- a/qai_hub_models/requirements.txt +++ b/qai_hub_models/requirements.txt @@ -17,3 +17,4 @@ torchvision>=0.16.2,<0.21 typing-extensions>=4.12.2 tqdm>=4.66 qai_hub>=0.18.1 +datasets==2.14.5 diff --git a/qai_hub_models/scorecard/device.py b/qai_hub_models/scorecard/device.py index 7c4c82ac..198e33e2 100644 --- a/qai_hub_models/scorecard/device.py +++ b/qai_hub_models/scorecard/device.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- import os -from enum import Enum +from enum import Enum, unique from functools import cached_property from typing import Optional @@ -32,7 +32,7 @@ class ScorecardDevice: def all_devices( cls, enabled: Optional[bool] = None, - supports_fp16: Optional[bool] = None, + supports_fp16_npu: Optional[bool] = None, supports_compile_path: Optional[ScorecardCompilePath] = None, supports_profile_path: Optional[ScorecardProfilePath] = None, ): @@ -46,8 +46,8 @@ def all_devices( if ( (enabled is None or enabled == device.enabled) and ( - supports_fp16 is None - or supports_fp16 == device.supports_fp16_inference + supports_fp16_npu is None + or supports_fp16_npu == device.supports_fp16_npu ) and ( supports_compile_path is None @@ -60,12 +60,21 @@ def all_devices( ) ] + @unique class FormFactor(Enum): phone = 0 - auto = 1 - xr = 2 - compute = 3 - iot = 4 + tablet = 1 + auto = 2 + xr = 3 + compute = 4 + iot = 5 + + @staticmethod + def from_string(string: str) -> "ScorecardDevice.FormFactor": + return ScorecardDevice.FormFactor[string.lower()] + + def __str__(self): + return self.name def __init__( self, @@ -75,6 +84,7 @@ def __init__( disabled_models: list[str] = [], compile_paths: Optional[list[ScorecardCompilePath]] = None, profile_paths: Optional[list[ScorecardProfilePath]] = None, + supports_fp16_npu: Optional[bool] = None, public: bool = True, ): """ @@ -94,6 +104,8 @@ def __init__( profile_paths: The set of profile paths valid for this device. If unset, will use the default set of paths for this device's form factor. + supports_fp16_npu: Whether this device supports FP16 on its NPU. If unset, the hexagon version is used as a default heuristic. + public: Whether this device is publicly available. """ if name in ScorecardDevice._registry: @@ -105,6 +117,7 @@ def __init__( self.execution_device_name = execution_device_name self._compile_paths = compile_paths self._profile_paths = profile_paths + self._supports_fp16_npu = supports_fp16_npu self.public = public ScorecardDevice._registry[name] = self @@ -150,7 +163,7 @@ def reference_device(self) -> hub.Device: Get the "reference" device used by the scorecard for metadata when collating results. This is not used by any actual scorecard jobs. """ - if self.reference_device_name: + if self.reference_device_name is not None: return _get_cached_device(self.reference_device_name) raise NotImplementedError(f"No reference device for {self.name}") @@ -160,7 +173,7 @@ def execution_device(self) -> hub.Device: Get the "reference" device used by the scorecard for metadata when collating results. This is not used by any actual scorecard jobs. """ - if self.execution_device_name: + if self.execution_device_name is not None: return _get_cached_device(self.execution_device_name) raise NotImplementedError(f"No execution device for {self.name}") @@ -210,7 +223,12 @@ def hexagon_version(self) -> int: raise ValueError(f"Hexagon version not found for device: {self.name}") @property - def supports_fp16_inference(self) -> bool: + def supports_fp16_npu(self) -> bool: + """ + Whether this device's NPU supports FP16 inference. + """ + if self._supports_fp16_npu is not None: + return self._supports_fp16_npu return self.hexagon_version >= 69 @cached_property @@ -229,11 +247,14 @@ def supported_runtimes(self) -> list[TargetRuntime]: @cached_property def profile_paths(self) -> list[ScorecardProfilePath]: - if self._profile_paths: + if self._profile_paths is not None: return self._profile_paths paths: list[ScorecardProfilePath] - if self.form_factor == ScorecardDevice.FormFactor.phone: + if self.form_factor in [ + ScorecardDevice.FormFactor.phone, + ScorecardDevice.FormFactor.tablet, + ]: paths = [ ScorecardProfilePath.ONNX, ScorecardProfilePath.QNN, @@ -247,6 +268,7 @@ def profile_paths(self) -> list[ScorecardProfilePath]: paths = [ ScorecardProfilePath.ONNX, ScorecardProfilePath.ONNX_DML_GPU, + ScorecardProfilePath.ONNX_DML_NPU, ScorecardProfilePath.QNN, ] elif self.form_factor == ScorecardDevice.FormFactor.iot: @@ -260,7 +282,7 @@ def profile_paths(self) -> list[ScorecardProfilePath]: @cached_property def compile_paths(self) -> list[ScorecardCompilePath]: - if self._compile_paths: + if self._compile_paths is not None: return self._compile_paths if ScorecardProfilePath.QNN in self.profile_paths: @@ -375,6 +397,7 @@ def compile_paths(self) -> list[ScorecardCompilePath]: cs_auto_makena_8295 = ScorecardDevice( name="cs_auto_makena_8295", reference_device_name="SA8295P ADP", + supports_fp16_npu=True, ) diff --git a/qai_hub_models/scorecard/execution_helpers.py b/qai_hub_models/scorecard/execution_helpers.py index f1549d36..4a64c512 100644 --- a/qai_hub_models/scorecard/execution_helpers.py +++ b/qai_hub_models/scorecard/execution_helpers.py @@ -2,6 +2,8 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- +from __future__ import annotations + from dataclasses import dataclass from typing import Optional @@ -39,12 +41,15 @@ def get_compile_parameterized_pytest_config( """ Get a pytest parameterization list of all enabled (device, compile path) pairs. """ - path_list: list[ScorecardCompilePath] = ScorecardCompilePath.all_enabled() # type: ignore + path_list: list[ScorecardCompilePath] = ScorecardCompilePath.all_compile_paths( + enabled=True, supports_quantization=model_is_quantized or None + ) + needs_fp16 = not model_is_quantized path_devices_dict = { sc_path: ScorecardDevice.all_devices( enabled=True, - supports_fp16=(True if needs_fp16 else None), + supports_fp16_npu=(True if needs_fp16 else None), supports_compile_path=sc_path, ) for sc_path in path_list @@ -59,16 +64,19 @@ def get_compile_parameterized_pytest_config( def get_profile_parameterized_pytest_config( model_is_quantized: bool = False, -) -> list[tuple["ScorecardProfilePath", ScorecardDevice]]: +) -> list[tuple[ScorecardProfilePath, ScorecardDevice]]: """ Get a pytest parameterization list of all enabled (device, profile path) pairs. """ - path_list: list[ScorecardProfilePath] = ScorecardProfilePath.all_enabled() # type: ignore + path_list: list[ScorecardProfilePath] = ScorecardProfilePath.all_profile_paths( + enabled=True, supports_quantization=model_is_quantized or None + ) needs_fp16 = not model_is_quantized + path_devices_dict = { sc_path: ScorecardDevice.all_devices( enabled=True, - supports_fp16=(True if needs_fp16 else None), + supports_fp16_npu=(True if needs_fp16 else None), supports_profile_path=sc_path, ) for sc_path in path_list @@ -103,6 +111,37 @@ def get_async_job_cache_name( ) +def get_async_job_id( + cache: dict[str, str], + path: ScorecardCompilePath | ScorecardProfilePath | TargetRuntime, + model_id: str, + device: ScorecardDevice, + component: Optional[str] = None, + fallback_to_universal_device: bool = False, +) -> str | None: + """ + Get the ID of this job in the YAML that stores asyncronously-ran scorecard jobs. + Returns None if job does not exist. + + parameters: + path: Applicable scorecard path + model_id: The ID of the QAIHM model being tested + device: The targeted device + component: The name of the model component being tested, if applicable + fallback_to_universal_device: Return a job that ran with the universal device if a job + using the provided device is not available. + """ + if x := cache.get(get_async_job_cache_name(path, model_id, device, component)): + return x + + if fallback_to_universal_device: + return cache.get( + get_async_job_cache_name(path, model_id, cs_universal, component) + ) + + return None + + def _on_staging() -> bool: """ Returns whether the hub client is pointing to staging. diff --git a/qai_hub_models/scorecard/intermediates/compile-jobs.yaml b/qai_hub_models/scorecard/intermediates/compile-jobs.yaml new file mode 100644 index 00000000..0249c9ba --- /dev/null +++ b/qai_hub_models/scorecard/intermediates/compile-jobs.yaml @@ -0,0 +1,1512 @@ +yolov8_seg_TFLITE: jgn69xkq5 +yolov8_seg_QNN: jprv49w7g +yolov8_seg_QNN-cs_8_elite: jp2k7jeqp +yolov8_seg_QNN-cs_8550: jpy14nmlp +yolov8_seg_QNN-cs_x_elite: jp0z1k6n5 +yolov8_seg_QNN-cs_auto_lemans_8255: jp8q381op +yolov8_seg_QNN-cs_auto_lemans_8775: jgkeld8ng +yolov8_seg_QNN-cs_auto_lemans_8650: j5q67wvop +yolov8_seg_QNN-cs_auto_makena_8295: jglv07xm5 +yolov8_seg_QNN-cs_xr_8450: j56y3v7yp +yolov8_seg_ONNX: jp3j489ng +yolov8_seg_ONNX_FP16: jgo21mrkp +yolov8_det_quantized_TFLITE: jpv614dr5 +yolov8_det_quantized_QNN: jgjv017eg +yolov8_det_quantized_QNN-cs_8_elite: jpedr2zv5 +yolov8_det_quantized_QNN-cs_6490: jgz3xwmx5 +yolov8_det_quantized_QNN-cs_8550: j5wedx7m5 +yolov8_det_quantized_QNN-cs_x_elite: jg9l38m8g +yolov8_det_quantized_QNN-cs_auto_lemans_8255: jp14d3j7p +yolov8_det_quantized_QNN-cs_auto_lemans_8775: jgdxr03zp +yolov8_det_quantized_QNN-cs_auto_lemans_8650: j57yj6495 +yolov8_det_quantized_QNN-cs_auto_makena_8295: jp4lx8115 +yolov8_det_quantized_QNN-cs_xr_8450: jpxk7m4l5 +yolov8_det_quantized_ONNX: j5mnw4m9p +yolov8_det_TFLITE: jgn69xnq5 +yolov8_det_QNN: jprv4907g +yolov8_det_QNN-cs_8_elite: jp2k7jwqp +yolov8_det_QNN-cs_8550: jpy14nxlp +yolov8_det_QNN-cs_x_elite: jp0z1kjn5 +yolov8_det_QNN-cs_auto_lemans_8255: jp8q38xop +yolov8_det_QNN-cs_auto_lemans_8775: jgkeld4ng +yolov8_det_QNN-cs_auto_lemans_8650: j5q67wyop +yolov8_det_QNN-cs_auto_makena_8295: jglv07ym5 +yolov8_det_QNN-cs_xr_8450: j56y3v8yp +yolov8_det_ONNX: jp3j48zng +yolov8_det_ONNX_FP16: jgo21mlkp +yolov7_quantized_TFLITE: jpv614lr5 +yolov7_quantized_QNN: jgjv01reg +yolov7_quantized_QNN-cs_8_elite: jpedr27v5 +yolov7_quantized_QNN-cs_6490: jgz3xwlx5 +yolov7_quantized_QNN-cs_8550: j5wedxlm5 +yolov7_quantized_QNN-cs_x_elite: jg9l38z8g +yolov7_quantized_QNN-cs_auto_lemans_8255: jp14d3n7p +yolov7_quantized_QNN-cs_auto_lemans_8775: jgdxr0dzp +yolov7_quantized_QNN-cs_auto_lemans_8650: j57yj6e95 +yolov7_quantized_QNN-cs_auto_makena_8295: jp4lx8y15 +yolov7_quantized_QNN-cs_xr_8450: jpxk7mll5 +yolov7_quantized_ONNX: j5mnw409p +yolov7_TFLITE: jgn69xzq5 +yolov7_QNN: jprv49l7g +yolov7_QNN-cs_8_elite: jp2k7jrqp +yolov7_QNN-cs_8550: jpy14nolp +yolov7_QNN-cs_x_elite: jp0z1kmn5 +yolov7_QNN-cs_auto_lemans_8255: jp8q38eop +yolov7_QNN-cs_auto_lemans_8775: jgkeld2ng +yolov7_QNN-cs_auto_lemans_8650: j5q67wlop +yolov7_QNN-cs_auto_makena_8295: jglv07wm5 +yolov7_QNN-cs_xr_8450: j56y3voyp +yolov7_ONNX: jp3j48ong +yolov7_ONNX_FP16: jgo21mdkp +yolov6_TFLITE: jpv6142r5 +yolov6_QNN: jgjv013eg +yolov6_QNN-cs_8_elite: jpedr26v5 +yolov6_QNN-cs_8550: jgz3xwzx5 +yolov6_QNN-cs_x_elite: j5wedxym5 +yolov6_QNN-cs_auto_lemans_8255: jg9l38o8g +yolov6_QNN-cs_auto_lemans_8775: jp14d3o7p +yolov6_QNN-cs_auto_lemans_8650: jgdxr06zp +yolov6_QNN-cs_auto_makena_8295: j5wedxy45 +yolov6_QNN-cs_xr_8450: jg9l38omg +yolov6_ONNX: jp14d3onp +yolov6_ONNX_FP16: jgdxr066p +yolov11_det_TFLITE: j57yj6on5 +yolov11_det_QNN: jp4lx8e25 +yolov11_det_QNN-cs_8_elite: jpxk7m085 +yolov11_det_QNN-cs_8550: j5mnw497p +yolov11_det_QNN-cs_x_elite: jgn69x1j5 +yolov11_det_QNN-cs_auto_lemans_8255: jprv49xkg +yolov11_det_QNN-cs_auto_lemans_8775: jp2k7jo6p +yolov11_det_QNN-cs_auto_lemans_8650: jpy14n80p +yolov11_det_QNN-cs_auto_makena_8295: jp0z1k905 +yolov11_det_QNN-cs_xr_8450: jp8q38rqp +yolov11_det_ONNX: jgkeld0vg +yolov11_det_ONNX_FP16: j5q67w1ep +yolonas_quantized_TFLITE: j56y3vmnp +yolonas_quantized_QNN: jp3j487mg +yolonas_quantized_QNN-cs_8_elite: jgo21mw1p +yolonas_quantized_QNN-cs_6490: jpv614mz5 +yolonas_quantized_QNN-cs_8550: jgjv01y1g +yolonas_quantized_QNN-cs_x_elite: jpedr2x85 +yolonas_quantized_QNN-cs_auto_lemans_8255: jgz3xwy45 +yolonas_quantized_QNN-cs_auto_lemans_8775: j5wedxz45 +yolonas_quantized_QNN-cs_auto_lemans_8650: jg9l382mg +yolonas_quantized_QNN-cs_auto_makena_8295: jp14d31np +yolonas_quantized_QNN-cs_xr_8450: jgdxr046p +yolonas_quantized_ONNX: j57yj6nn5 +yolonas_TFLITE: jp4lx8425 +yolonas_QNN: jpxk7mr85 +yolonas_QNN-cs_8_elite: j5mnw4k7p +yolonas_QNN-cs_8550: jgn69xqj5 +yolonas_QNN-cs_x_elite: jprv49dkg +yolonas_QNN-cs_auto_lemans_8255: jp2k7jd6p +yolonas_QNN-cs_auto_lemans_8775: jpy14n20p +yolonas_QNN-cs_auto_lemans_8650: jp0z1kx05 +yolonas_QNN-cs_auto_makena_8295: jp8q38kqp +yolonas_QNN-cs_xr_8450: jgkeldkvg +yolonas_ONNX: j5q67wdep +yolonas_ONNX_FP16: jglv07q25 +xlsr_TFLITE: j56y3v0np +xlsr_QNN: jp3j48rmg +xlsr_QNN-cs_8_elite: jgo21m91p +xlsr_QNN-cs_8550: jpv614nz5 +xlsr_QNN-cs_x_elite: jgjv0181g +xlsr_QNN-cs_auto_lemans_8255: jpedr2n85 +xlsr_QNN-cs_auto_lemans_8775: jgz3xw045 +xlsr_QNN-cs_auto_lemans_8650: j5wedxr45 +xlsr_QNN-cs_auto_makena_8295: jg9l38qmg +xlsr_QNN-cs_xr_8450: jp14d3mnp +xlsr_ONNX: jgdxr0m6p +xlsr_ONNX_FP16: j57yj68n5 +wideresnet50_TFLITE: jp4lx8225 +wideresnet50_QNN: jpxk7mz85 +wideresnet50_QNN-cs_8_elite: j5mnw4l7p +wideresnet50_QNN-cs_8550: jgn69xwj5 +wideresnet50_QNN-cs_x_elite: jprv497kg +wideresnet50_QNN-cs_auto_lemans_8255: jp2k7jz6p +wideresnet50_QNN-cs_auto_lemans_8775: jpy14ny0p +wideresnet50_QNN-cs_auto_lemans_8650: jp0z1k805 +wideresnet50_QNN-cs_auto_makena_8295: jp8q38dqp +wideresnet50_QNN-cs_xr_8450: jgkeldwvg +wideresnet50_ONNX: j5q67wxep +wideresnet50_ONNX_FP16: jglv07925 +whisper_tiny_en_TFLITE_WhisperEncoder: j56y3v9np +whisper_tiny_en_TFLITE_WhisperDecoder: jp3j48lmg +whisper_tiny_en_QNN_WhisperEncoder: jgo21m71p +whisper_tiny_en_QNN_WhisperDecoder: jpv614yz5 +whisper_tiny_en_QNN-cs_8_elite_WhisperEncoder: jgjv0161g +whisper_tiny_en_QNN-cs_8_elite_WhisperDecoder: jpedr2085 +whisper_tiny_en_QNN-cs_8550_WhisperEncoder: jgz3xwq45 +whisper_tiny_en_QNN-cs_8550_WhisperDecoder: j5wedx045 +whisper_tiny_en_QNN-cs_x_elite_WhisperEncoder: jg9l387mg +whisper_tiny_en_QNN-cs_x_elite_WhisperDecoder: jp14d3knp +whisper_tiny_en_QNN-cs_auto_lemans_8255_WhisperEncoder: jgdxr0y6p +whisper_tiny_en_QNN-cs_auto_lemans_8255_WhisperDecoder: j57yj61n5 +whisper_tiny_en_QNN-cs_auto_lemans_8775_WhisperEncoder: jp4lx8625 +whisper_tiny_en_QNN-cs_auto_lemans_8775_WhisperDecoder: jpxk7m885 +whisper_tiny_en_QNN-cs_auto_lemans_8650_WhisperEncoder: j5mnw417p +whisper_tiny_en_QNN-cs_auto_lemans_8650_WhisperDecoder: jgn69xdj5 +whisper_tiny_en_QNN-cs_auto_makena_8295_WhisperEncoder: jprv49mkg +whisper_tiny_en_QNN-cs_auto_makena_8295_WhisperDecoder: jp2k7jq6p +whisper_tiny_en_QNN-cs_xr_8450_WhisperEncoder: jpy14nk0p +whisper_tiny_en_QNN-cs_xr_8450_WhisperDecoder: jp0z1kw05 +whisper_tiny_en_ONNX_WhisperEncoder: jp8q38nqp +whisper_tiny_en_ONNX_WhisperDecoder: jgkeld1vg +whisper_tiny_en_ONNX_FP16_WhisperEncoder: j5q67wnep +whisper_tiny_en_ONNX_FP16_WhisperDecoder: jglv07d25 +whisper_small_en_TFLITE_WhisperEncoder: j56y3vxnp +whisper_small_en_TFLITE_WhisperDecoder: jp3j48dmg +whisper_small_en_QNN_WhisperEncoder: jgo21mx1p +whisper_small_en_QNN_WhisperDecoder: jpv6148z5 +whisper_small_en_QNN-cs_8_elite_WhisperEncoder: jgjv0191g +whisper_small_en_QNN-cs_8_elite_WhisperDecoder: jpedr2q85 +whisper_small_en_QNN-cs_8550_WhisperEncoder: jgz3xw645 +whisper_small_en_QNN-cs_8550_WhisperDecoder: j5wedxk45 +whisper_small_en_QNN-cs_x_elite_WhisperEncoder: jg9l38rmg +whisper_small_en_QNN-cs_x_elite_WhisperDecoder: jp14d39np +whisper_small_en_QNN-cs_auto_lemans_8255_WhisperEncoder: jgdxr0k6p +whisper_small_en_QNN-cs_auto_lemans_8255_WhisperDecoder: j5wedxkz5 +whisper_small_en_QNN-cs_auto_lemans_8775_WhisperEncoder: jg9l38rqg +whisper_small_en_QNN-cs_auto_lemans_8775_WhisperDecoder: jp14d39kp +whisper_small_en_QNN-cs_auto_lemans_8650_WhisperEncoder: jgdxr0kkp +whisper_small_en_QNN-cs_auto_lemans_8650_WhisperDecoder: j57yj6mq5 +whisper_small_en_QNN-cs_auto_makena_8295_WhisperEncoder: jp4lx87q5 +whisper_small_en_QNN-cs_auto_makena_8295_WhisperDecoder: jpxk7mqj5 +whisper_small_en_QNN-cs_xr_8450_WhisperEncoder: j5mnw47yp +whisper_small_en_QNN-cs_xr_8450_WhisperDecoder: jgn69xjv5 +whisper_small_en_ONNX_WhisperEncoder: jprv49zvg +whisper_small_en_ONNX_WhisperDecoder: jp2k7j2xp +whisper_small_en_ONNX_FP16_WhisperEncoder: jpy14n9rp +whisper_small_en_ONNX_FP16_WhisperDecoder: jp0z1kn25 +whisper_base_en_TFLITE_WhisperEncoder: jp8q38lzp +whisper_base_en_TFLITE_WhisperDecoder: jgkeldjyg +whisper_base_en_QNN_WhisperEncoder: j5q67wj7p +whisper_base_en_QNN_WhisperDecoder: jglv07je5 +whisper_base_en_QNN-cs_8_elite_WhisperEncoder: j56y3vkvp +whisper_base_en_QNN-cs_8_elite_WhisperDecoder: jp3j48yxg +whisper_base_en_QNN-cs_8550_WhisperEncoder: jgo21mj4p +whisper_base_en_QNN-cs_8550_WhisperDecoder: jpv614j75 +whisper_base_en_QNN-cs_x_elite_WhisperEncoder: jgjv01j7g +whisper_base_en_QNN-cs_x_elite_WhisperDecoder: jpedr2j75 +whisper_base_en_QNN-cs_auto_lemans_8255_WhisperEncoder: jgz3xw1z5 +whisper_base_en_QNN-cs_auto_lemans_8255_WhisperDecoder: j5wedxjz5 +whisper_base_en_QNN-cs_auto_lemans_8775_WhisperEncoder: jg9l386qg +whisper_base_en_QNN-cs_auto_lemans_8775_WhisperDecoder: jp14d3rkp +whisper_base_en_QNN-cs_auto_lemans_8650_WhisperEncoder: jgdxr0jkp +whisper_base_en_QNN-cs_auto_lemans_8650_WhisperDecoder: j57yj6qq5 +whisper_base_en_QNN-cs_auto_makena_8295_WhisperEncoder: jp4lx8zq5 +whisper_base_en_QNN-cs_auto_makena_8295_WhisperDecoder: jpxk7mwj5 +whisper_base_en_QNN-cs_xr_8450_WhisperEncoder: j5mnw4jyp +whisper_base_en_QNN-cs_xr_8450_WhisperDecoder: jgn69x2v5 +whisper_base_en_ONNX_WhisperEncoder: jprv49kvg +whisper_base_en_ONNX_WhisperDecoder: jp2k7j8xp +whisper_base_en_ONNX_FP16_WhisperEncoder: jpy14nerp +whisper_base_en_ONNX_FP16_WhisperDecoder: jp0z1ky25 +vit_TFLITE: jp8q38ozp +vit_QNN: jgkeldzyg +vit_QNN-cs_8_elite: j5q67w87p +vit_QNN-cs_8550: jglv07ne5 +vit_QNN-cs_x_elite: j56y3v6vp +vit_QNN-cs_auto_lemans_8255: jp3j48kxg +vit_QNN-cs_auto_lemans_8775: jgo21my4p +vit_QNN-cs_auto_lemans_8650: jpv614375 +vit_QNN-cs_auto_makena_8295: jgjv01x7g +vit_QNN-cs_xr_8450: jpedr2975 +vit_ONNX: jgz3xwez5 +vit_ONNX_FP16: j5wedxoz5 +unet_segmentation_TFLITE: jp14d30kp +unet_segmentation_QNN: jgdxr0wkp +unet_segmentation_QNN-cs_8_elite: j57yj6zq5 +unet_segmentation_QNN-cs_8550: jp4lx8qq5 +unet_segmentation_QNN-cs_x_elite: jpxk7mvj5 +unet_segmentation_QNN-cs_auto_lemans_8255: j5mnw4ryp +unet_segmentation_QNN-cs_auto_lemans_8775: jgn69xyv5 +unet_segmentation_QNN-cs_auto_lemans_8650: jprv49qvg +unet_segmentation_QNN-cs_auto_makena_8295: jp2k7j6xp +unet_segmentation_QNN-cs_xr_8450: jpy14nwrp +unet_segmentation_ONNX: jp0z1kq25 +unet_segmentation_ONNX_FP16: jp8q389zp +trocr_TFLITE_TrOCREncoder: j5q67wk7p +trocr_TFLITE_TrOCRDecoder: jglv07ze5 +trocr_QNN_TrOCREncoder: j56y3vjvp +trocr_QNN_TrOCRDecoder: jp3j483xg +trocr_QNN-cs_8_elite_TrOCREncoder: jgo21m04p +trocr_QNN-cs_8_elite_TrOCRDecoder: jpv614o75 +trocr_QNN-cs_8550_TrOCREncoder: jgjv01m7g +trocr_QNN-cs_8550_TrOCRDecoder: jpedr2175 +trocr_QNN-cs_x_elite_TrOCREncoder: jgz3xw9z5 +trocr_QNN-cs_x_elite_TrOCRDecoder: j5wedxvz5 +trocr_QNN-cs_auto_lemans_8255_TrOCREncoder: jg9l381qg +trocr_QNN-cs_auto_lemans_8255_TrOCRDecoder: jp14d3lkp +trocr_QNN-cs_auto_lemans_8775_TrOCREncoder: jgdxr09kp +trocr_QNN-cs_auto_lemans_8775_TrOCRDecoder: j57yj6wq5 +trocr_QNN-cs_auto_lemans_8650_TrOCREncoder: jp4lx8oq5 +trocr_QNN-cs_auto_lemans_8650_TrOCRDecoder: jpxk7mjj5 +trocr_QNN-cs_auto_makena_8295_TrOCREncoder: j5mnw42yp +trocr_QNN-cs_auto_makena_8295_TrOCRDecoder: jgn69x8v5 +trocr_QNN-cs_xr_8450_TrOCREncoder: jprv49jvg +trocr_QNN-cs_xr_8450_TrOCRDecoder: jp2k7jnxp +trocr_ONNX_TrOCREncoder: jpy14n0rp +trocr_ONNX_TrOCRDecoder: jp0z1k725 +trocr_ONNX_FP16_TrOCREncoder: jp8q38vzp +trocr_ONNX_FP16_TrOCRDecoder: jgkeldmyg +swin_tiny_TFLITE: j5q67wo7p +swin_tiny_QNN: jglv07re5 +swin_tiny_QNN-cs_8_elite: j56y3vlvp +swin_tiny_QNN-cs_8550: jp3j482xg +swin_tiny_QNN-cs_x_elite: jgo21mq4p +swin_tiny_QNN-cs_auto_lemans_8255: jpv614x75 +swin_tiny_QNN-cs_auto_lemans_8775: jgjv0147g +swin_tiny_QNN-cs_auto_lemans_8650: jpedr2375 +swin_tiny_QNN-cs_auto_makena_8295: jgz3xwkz5 +swin_tiny_QNN-cs_xr_8450: j5wedxnz5 +swin_tiny_ONNX: jg9l38eqg +swin_tiny_ONNX_FP16: jp14d3xkp +swin_small_TFLITE: jgdxr0lkp +swin_small_QNN: j5wedxnj5 +swin_small_QNN-cs_8_elite: jg9l38evg +swin_small_QNN-cs_8550: jp14d3xlp +swin_small_QNN-cs_x_elite: jgdxr0llp +swin_small_QNN-cs_auto_lemans_8255: j57yjkrr5 +swin_small_QNN-cs_auto_lemans_8775: jp4lxmrl5 +swin_small_QNN-cs_auto_lemans_8650: jpxk73o95 +swin_small_QNN-cs_auto_makena_8295: j5mnwoxqp +swin_small_QNN-cs_xr_8450: jgn69ovm5 +swin_small_ONNX: jprv4o3eg +swin_small_ONNX_FP16: jp2k74ymp +swin_base_TFLITE: jpy14q34p +swin_base_QNN: jp0z1d0e5 +swin_base_QNN-cs_8_elite: jp8q36y8p +swin_base_QNN-cs_8550: jgkeloxog +swin_base_QNN-cs_x_elite: j5q67zqmp +swin_base_QNN-cs_auto_lemans_8255: jglv0oml5 +swin_base_QNN-cs_auto_lemans_8775: j56y3r47p +swin_base_QNN-cs_auto_lemans_8650: jp3j4x0zg +swin_base_QNN-cs_auto_makena_8295: jgo21o6dp +swin_base_QNN-cs_xr_8450: jpv61ekm5 +swin_base_ONNX: jgjv0on8g +swin_base_ONNX_FP16: jpedr8m05 +squeezenet1_1_TFLITE: j5wed86j5 +squeezenet1_1_QNN: jg9l3knvg +squeezenet1_1_QNN-cs_8_elite: jp14d7zlp +squeezenet1_1_QNN-cs_8550: jgdxr81lp +squeezenet1_1_QNN-cs_x_elite: j57yjkyr5 +squeezenet1_1_QNN-cs_auto_lemans_8255: jp4lxmll5 +squeezenet1_1_QNN-cs_auto_lemans_8775: jpxk73k95 +squeezenet1_1_QNN-cs_auto_lemans_8650: j5mnwonqp +squeezenet1_1_QNN-cs_auto_makena_8295: jgn69o6m5 +squeezenet1_1_QNN-cs_xr_8450: jprv4oveg +squeezenet1_1_ONNX: jp2k74kmp +squeezenet1_1_ONNX_FP16: jpy14q14p +sinet_TFLITE: jp0z1dze5 +sinet_QNN: jp8q36q8p +sinet_QNN-cs_8_elite: jgkeloeog +sinet_QNN-cs_8550: j5q67z6mp +sinet_QNN-cs_x_elite: jglv0ovl5 +sinet_QNN-cs_auto_lemans_8255: j56y3ry7p +sinet_QNN-cs_auto_lemans_8775: jp3j4xjzg +sinet_QNN-cs_auto_lemans_8650: jgo21o2dp +sinet_QNN-cs_auto_makena_8295: jpv61e6m5 +sinet_QNN-cs_xr_8450: jgjv0ov8g +sinet_ONNX: jpedr8d05 +sinet_ONNX_FP16: jgz3x8365 +shufflenet_v2_TFLITE: j5wed8ej5 +shufflenet_v2_QNN: jg9l3klvg +shufflenet_v2_QNN-cs_8_elite: jp14d74lp +shufflenet_v2_QNN-cs_8550: jgdxr8xlp +shufflenet_v2_QNN-cs_x_elite: j57yjk2r5 +shufflenet_v2_QNN-cs_auto_lemans_8255: jp4lxmnl5 +shufflenet_v2_QNN-cs_auto_lemans_8775: jpxk73995 +shufflenet_v2_QNN-cs_auto_lemans_8650: j5mnwoeqp +shufflenet_v2_QNN-cs_auto_makena_8295: jgn69o0m5 +shufflenet_v2_QNN-cs_xr_8450: jprv4o6eg +shufflenet_v2_ONNX: jp2k74xmp +shufflenet_v2_ONNX_FP16: jpy14qz4p +sesr_m5_TFLITE: jp0z1d4e5 +sesr_m5_QNN: jp8q3628p +sesr_m5_QNN-cs_8_elite: jgkelovog +sesr_m5_QNN-cs_8550: j5q67z0mp +sesr_m5_QNN-cs_x_elite: jglv0o4l5 +sesr_m5_QNN-cs_auto_lemans_8255: j56y3r27p +sesr_m5_QNN-cs_auto_lemans_8775: jp3j4xnzg +sesr_m5_QNN-cs_auto_lemans_8650: jgo21ozdp +sesr_m5_QNN-cs_auto_makena_8295: jpv61eqm5 +sesr_m5_QNN-cs_xr_8450: jgjv0od8g +sesr_m5_ONNX: jpedr8o05 +sesr_m5_ONNX_FP16: jgz3x8265 +sam_TFLITE_SAMDecoder: j5wed8wj5 +sam_TFLITE_SAMEncoder: jg9l3k0vg +sam_QNN_SAMDecoder: jp14d72lp +sam_QNN_SAMEncoder: jgdxr8nlp +sam_QNN-cs_8_elite_SAMDecoder: j57yjk0r5 +sam_QNN-cs_8_elite_SAMEncoder: jp4lxmkl5 +sam_QNN-cs_8550_SAMDecoder: jpxk73n95 +sam_QNN-cs_8550_SAMEncoder: j5mnwoqqp +sam_QNN-cs_x_elite_SAMDecoder: jgn69olm5 +sam_QNN-cs_x_elite_SAMEncoder: jprv4o8eg +sam_QNN-cs_auto_lemans_8255_SAMDecoder: jp2k740mp +sam_QNN-cs_auto_lemans_8255_SAMEncoder: jpy14qr4p +sam_QNN-cs_auto_lemans_8775_SAMDecoder: jp0z1d3e5 +sam_QNN-cs_auto_lemans_8775_SAMEncoder: jp8q3608p +sam_QNN-cs_auto_lemans_8650_SAMDecoder: jgkelo7og +sam_QNN-cs_auto_lemans_8650_SAMEncoder: j5q67zemp +sam_QNN-cs_auto_makena_8295_SAMDecoder: jglv0o6l5 +sam_QNN-cs_auto_makena_8295_SAMEncoder: j56y3re7p +sam_QNN-cs_xr_8450_SAMDecoder: jp3j4xvzg +sam_QNN-cs_xr_8450_SAMEncoder: jpv61e0m5 +sam_ONNX_SAMDecoder: jgjv0oz8g +sam_ONNX_SAMEncoder: jpedr8e05 +sam_ONNX_FP16_SAMDecoder: jgz3x8o65 +sam_ONNX_FP16_SAMEncoder: j5wed82j5 +resnext50_TFLITE: jg9l3kjvg +resnext50_QNN: jp14d7ylp +resnext50_QNN-cs_8_elite: jgdxr8elp +resnext50_QNN-cs_8550: j5wed8365 +resnext50_QNN-cs_x_elite: jg9l3kylg +resnext50_QNN-cs_auto_lemans_8255: jp14d7w2p +resnext50_QNN-cs_auto_lemans_8775: jgdxr8qep +resnext50_QNN-cs_auto_lemans_8650: j57yjkll5 +resnext50_QNN-cs_auto_makena_8295: jp4lxmdv5 +resnext50_QNN-cs_xr_8450: jpxk73615 +resnext50_ONNX: j5mnwo6wp +resnext50_ONNX_FP16: jgn69omr5 +resnext101_TFLITE: jprv4o29g +resnext101_QNN: jp2k7494p +resnext101_QNN-cs_8_elite: jpy14qj7p +resnext101_QNN-cs_8550: jp0z1d265 +resnext101_QNN-cs_x_elite: jp8q36mxp +resnext101_QNN-cs_auto_lemans_8255: jgkeloq2g +resnext101_QNN-cs_auto_lemans_8775: j5q67zr4p +resnext101_QNN-cs_auto_lemans_8650: jglv0o285 +resnext101_QNN-cs_auto_makena_8295: j56y3rz0p +resnext101_QNN-cs_xr_8450: jp3j4x1lg +resnext101_ONNX: jgo21onxp +resnext101_ONNX_FP16: jpv61erj5 +resnet50_TFLITE: jgjv0o2xg +resnet50_QNN: jpedr8w15 +resnet50_QNN-cs_8_elite: jgz3x8jk5 +resnet50_QNN-cs_8550: j5wed8q65 +resnet50_QNN-cs_x_elite: jg9l3kwlg +resnet50_QNN-cs_auto_lemans_8255: jp14d7e2p +resnet50_QNN-cs_auto_lemans_8775: jgdxr8oep +resnet50_QNN-cs_auto_lemans_8650: j57yjkxl5 +resnet50_QNN-cs_auto_makena_8295: jp4lxmvv5 +resnet50_QNN-cs_xr_8450: jpxk73y15 +resnet50_ONNX: j5mnwo3wp +resnet50_ONNX_FP16: jgn69o3r5 +resnet18_TFLITE: jprv4oe9g +resnet18_QNN: jp2k74l4p +resnet18_QNN-cs_8_elite: jpy14q67p +resnet18_QNN-cs_8550: jp0z1dl65 +resnet18_QNN-cs_x_elite: jp8q36zxp +resnet18_QNN-cs_auto_lemans_8255: jgkelo32g +resnet18_QNN-cs_auto_lemans_8775: j5q67z34p +resnet18_QNN-cs_auto_lemans_8650: jglv0o385 +resnet18_QNN-cs_auto_makena_8295: j56y3rn0p +resnet18_QNN-cs_xr_8450: jp3j4xelg +resnet18_ONNX: jgo21o3xp +resnet18_ONNX_FP16: jpv61evj5 +resnet101_TFLITE: jgjv0oexg +resnet101_QNN: jpedr8k15 +resnet101_QNN-cs_8_elite: jgz3x8rk5 +resnet101_QNN-cs_8550: j5wed8965 +resnet101_QNN-cs_x_elite: jg9l3k4lg +resnet101_QNN-cs_auto_lemans_8255: jp14d782p +resnet101_QNN-cs_auto_lemans_8775: jgdxr8vep +resnet101_QNN-cs_auto_lemans_8650: j57yjkdl5 +resnet101_QNN-cs_auto_makena_8295: jp4lxmwv5 +resnet101_QNN-cs_xr_8450: jpxk73115 +resnet101_ONNX: j5mnwozwp +resnet101_ONNX_FP16: jgn69oer5 +regnet_TFLITE: jp2k74m4p +regnet_QNN: jpy14qd7p +regnet_QNN-cs_8_elite: jp0z1dr65 +regnet_QNN-cs_8550: jp8q367xp +regnet_QNN-cs_x_elite: jgkeloy2g +regnet_QNN-cs_auto_lemans_8255: j5q67z24p +regnet_QNN-cs_auto_lemans_8775: jglv0ok85 +regnet_QNN-cs_auto_lemans_8650: j56y3r10p +regnet_QNN-cs_auto_makena_8295: jp3j4xmlg +regnet_QNN-cs_xr_8450: jgo21ovxp +regnet_ONNX: jpv61ewj5 +regnet_ONNX_FP16: jgjv0olxg +real_esrgan_x4plus_TFLITE: jpedr8v15 +real_esrgan_x4plus_QNN: jgz3x87k5 +real_esrgan_x4plus_QNN-cs_8_elite: j5wed8d65 +real_esrgan_x4plus_QNN-cs_8550: jg9l3k3lg +real_esrgan_x4plus_QNN-cs_x_elite: jp14d7d2p +real_esrgan_x4plus_QNN-cs_auto_lemans_8255: jgdxr8rep +real_esrgan_x4plus_QNN-cs_auto_lemans_8775: j57yjkjl5 +real_esrgan_x4plus_QNN-cs_auto_lemans_8650: jp4lxmxv5 +real_esrgan_x4plus_QNN-cs_auto_makena_8295: jpxk73715 +real_esrgan_x4plus_QNN-cs_xr_8450: j5mnwowwp +real_esrgan_x4plus_ONNX: jgn69o9r5 +real_esrgan_x4plus_ONNX_FP16: jprv4o49g +real_esrgan_general_x4v3_TFLITE: jp2k7474p +real_esrgan_general_x4v3_QNN: jpy14q47p +real_esrgan_general_x4v3_QNN-cs_8_elite: jp0z1d165 +real_esrgan_general_x4v3_QNN-cs_8550: jp8q363xp +real_esrgan_general_x4v3_QNN-cs_x_elite: jgkelol2g +real_esrgan_general_x4v3_QNN-cs_auto_lemans_8255: j5q67z74p +real_esrgan_general_x4v3_QNN-cs_auto_lemans_8775: jglv0o085 +real_esrgan_general_x4v3_QNN-cs_auto_lemans_8650: j56y3r30p +real_esrgan_general_x4v3_QNN-cs_auto_makena_8295: jp3j4x4lg +real_esrgan_general_x4v3_QNN-cs_xr_8450: jgo21o1xp +real_esrgan_general_x4v3_ONNX: jpv61e1j5 +real_esrgan_general_x4v3_ONNX_FP16: jgjv0o0xg +quicksrnetsmall_TFLITE: jpedr8r15 +quicksrnetsmall_QNN: jgz3x8xk5 +quicksrnetsmall_QNN-cs_8_elite: j5wed8m65 +quicksrnetsmall_QNN-cs_8550: jg9l3k9lg +quicksrnetsmall_QNN-cs_x_elite: jp14d7q2p +quicksrnetsmall_QNN-cs_auto_lemans_8255: jgdxr87ep +quicksrnetsmall_QNN-cs_auto_lemans_8775: j5wed8m35 +quicksrnetsmall_QNN-cs_auto_lemans_8650: jg9l3k9wg +quicksrnetsmall_QNN-cs_auto_makena_8295: jp14d7q8p +quicksrnetsmall_QNN-cs_xr_8450: jgdxr87rp +quicksrnetsmall_ONNX: j57yjkvv5 +quicksrnetsmall_ONNX_FP16: jp4lxmj85 +quicksrnetmedium_TFLITE: jpxk73e35 +quicksrnetmedium_QNN: j5mnwovdp +quicksrnetmedium_QNN-cs_8_elite: jgn69ork5 +quicksrnetmedium_QNN-cs_8550: jprv4o10g +quicksrnetmedium_QNN-cs_x_elite: jp2k743rp +quicksrnetmedium_QNN-cs_auto_lemans_8255: jpy14qv8p +quicksrnetmedium_QNN-cs_auto_lemans_8775: jp0z1de95 +quicksrnetmedium_QNN-cs_auto_lemans_8650: jp8q36wkp +quicksrnetmedium_QNN-cs_auto_makena_8295: jgkelorwg +quicksrnetmedium_QNN-cs_xr_8450: j5q67z9np +quicksrnetmedium_ONNX: jglv0oej5 +quicksrnetmedium_ONNX_FP16: j56y3rq6p +quicksrnetlarge_TFLITE: jp3j4xq3g +quicksrnetlarge_QNN: jgo21oeqp +quicksrnetlarge_QNN-cs_8_elite: jpv61e4k5 +quicksrnetlarge_QNN-cs_8550: jgjv0o1vg +quicksrnetlarge_QNN-cs_x_elite: jpedr82o5 +quicksrnetlarge_QNN-cs_auto_lemans_8255: jgz3x8wo5 +quicksrnetlarge_QNN-cs_auto_lemans_8775: j5wed8x35 +quicksrnetlarge_QNN-cs_auto_lemans_8650: jg9l3k8wg +quicksrnetlarge_QNN-cs_auto_makena_8295: jp14d738p +quicksrnetlarge_QNN-cs_xr_8450: jgdxr80rp +quicksrnetlarge_ONNX: j57yjk6v5 +quicksrnetlarge_ONNX_FP16: jp4lxm885 +posenet_mobilenet_quantized_TFLITE: j5mnwo4dp +posenet_mobilenet_quantized_QNN: jgn69oxk5 +posenet_mobilenet_quantized_QNN-cs_8_elite: jprv4o90g +posenet_mobilenet_quantized_QNN-cs_6490: jp2k74jrp +posenet_mobilenet_quantized_QNN-cs_8550: jpy14qn8p +posenet_mobilenet_quantized_QNN-cs_x_elite: jp0z1dk95 +posenet_mobilenet_quantized_QNN-cs_auto_lemans_8255: jp8q368kp +posenet_mobilenet_quantized_QNN-cs_auto_lemans_8775: jgkelodwg +posenet_mobilenet_quantized_QNN-cs_auto_lemans_8650: j5q67zwnp +posenet_mobilenet_quantized_QNN-cs_auto_makena_8295: jglv0o7j5 +posenet_mobilenet_quantized_QNN-cs_xr_8450: j56y3rv6p +posenet_mobilenet_quantized_ONNX: jp3j4x83g +posenet_mobilenet_TFLITE: jgo21omqp +posenet_mobilenet_QNN: jpv61eek5 +posenet_mobilenet_QNN-cs_8_elite: jgjv0oovg +posenet_mobilenet_QNN-cs_8550: jpedr88o5 +posenet_mobilenet_QNN-cs_x_elite: jgz3x88o5 +posenet_mobilenet_QNN-cs_auto_lemans_8255: j5wed8835 +posenet_mobilenet_QNN-cs_auto_lemans_8775: jg9l3kkwg +posenet_mobilenet_QNN-cs_auto_lemans_8650: jp14d778p +posenet_mobilenet_QNN-cs_auto_makena_8295: jgdxr88rp +posenet_mobilenet_QNN-cs_xr_8450: j57yjkkv5 +posenet_mobilenet_ONNX: jp4lxmm85 +posenet_mobilenet_ONNX_FP16: jpxk73335 +openpose_TFLITE: j5mnwoodp +openpose_QNN: jgn69ook5 +openpose_QNN-cs_8_elite: jprv4oo0g +openpose_QNN-cs_8550: jp2k744rp +openpose_QNN-cs_x_elite: jpy14qq8p +openpose_QNN-cs_auto_lemans_8255: jp0z1dd95 +openpose_QNN-cs_auto_lemans_8775: jp8q366kp +openpose_QNN-cs_auto_lemans_8650: jgkeloowg +openpose_QNN-cs_auto_makena_8295: j5q67zznp +openpose_QNN-cs_xr_8450: jglv0ooj5 +openpose_ONNX: j56y3rr6p +openpose_ONNX_FP16: jp3j4xx3g +openai_clip_TFLITE_CLIPTextEncoder: jgo21ooqp +openai_clip_TFLITE_CLIPImageEncoder: jpv61e9k5 +openai_clip_QNN_CLIPTextEncoder: jgjv0owvg +openai_clip_QNN_CLIPImageEncoder: jpedr8lo5 +openai_clip_QNN-cs_8_elite_CLIPTextEncoder: jgz3x84o5 +openai_clip_QNN-cs_8_elite_CLIPImageEncoder: j5wed8135 +openai_clip_QNN-cs_8550_CLIPTextEncoder: jg9l3kxwg +openai_clip_QNN-cs_8550_CLIPImageEncoder: jp14d7v8p +openai_clip_QNN-cs_x_elite_CLIPTextEncoder: jgdxr8zrp +openai_clip_QNN-cs_x_elite_CLIPImageEncoder: j57yjk7v5 +openai_clip_QNN-cs_auto_lemans_8255_CLIPTextEncoder: jp4lxm985 +openai_clip_QNN-cs_auto_lemans_8255_CLIPImageEncoder: jpxk73d35 +openai_clip_QNN-cs_auto_lemans_8775_CLIPTextEncoder: j5mnwoddp +openai_clip_QNN-cs_auto_lemans_8775_CLIPImageEncoder: jgn69o7k5 +openai_clip_QNN-cs_auto_lemans_8650_CLIPTextEncoder: jprv4on0g +openai_clip_QNN-cs_auto_lemans_8650_CLIPImageEncoder: jp2k74vrp +openai_clip_QNN-cs_auto_makena_8295_CLIPTextEncoder: jpy14q78p +openai_clip_QNN-cs_auto_makena_8295_CLIPImageEncoder: jp0z1dv95 +openai_clip_QNN-cs_xr_8450_CLIPTextEncoder: jp8q364kp +openai_clip_QNN-cs_xr_8450_CLIPImageEncoder: jgkelo9wg +openai_clip_ONNX_CLIPTextEncoder: j5q67zmnp +openai_clip_ONNX_CLIPImageEncoder: jglv0o1j5 +openai_clip_ONNX_FP16_CLIPTextEncoder: j56y3rd6p +openai_clip_ONNX_FP16_CLIPImageEncoder: jp3j4xw3g +mobilenet_v3_small_TFLITE: jgo21o4qp +mobilenet_v3_small_QNN: jpv61e7k5 +mobilenet_v3_small_QNN-cs_8_elite: jgjv0oqvg +mobilenet_v3_small_QNN-cs_8550: jpedr8yo5 +mobilenet_v3_small_QNN-cs_x_elite: jgz3x8no5 +mobilenet_v3_small_QNN-cs_auto_lemans_8255: j5wed8435 +mobilenet_v3_small_QNN-cs_auto_lemans_8775: jg9l3kdwg +mobilenet_v3_small_QNN-cs_auto_lemans_8650: jp14d768p +mobilenet_v3_small_QNN-cs_auto_makena_8295: jgdxr82rp +mobilenet_v3_small_QNN-cs_xr_8450: j5wed84m5 +mobilenet_v3_small_ONNX: jg9l3kd8g +mobilenet_v3_small_ONNX_FP16: jp14d767p +mobilenet_v3_large_TFLITE: jgdxr82zp +mobilenet_v3_large_QNN: j57yjk995 +mobilenet_v3_large_QNN-cs_8_elite: jp4lxm315 +mobilenet_v3_large_QNN-cs_8550: jpxk73xl5 +mobilenet_v3_large_QNN-cs_x_elite: j5mnwo89p +mobilenet_v3_large_QNN-cs_auto_lemans_8255: jgn69okq5 +mobilenet_v3_large_QNN-cs_auto_lemans_8775: jprv4ow7g +mobilenet_v3_large_QNN-cs_auto_lemans_8650: jp2k74eqp +mobilenet_v3_large_QNN-cs_auto_makena_8295: jpy14qmlp +mobilenet_v3_large_QNN-cs_xr_8450: jp0z1d6n5 +mobilenet_v3_large_ONNX: jp8q361op +mobilenet_v3_large_ONNX_FP16: jgkelo8ng +mobilenet_v2_TFLITE: j5q67zvop +mobilenet_v2_QNN: jglv0oxm5 +mobilenet_v2_QNN-cs_8_elite: j56y3r7yp +mobilenet_v2_QNN-cs_8550: jp3j4x9ng +mobilenet_v2_QNN-cs_x_elite: jgo21orkp +mobilenet_v2_QNN-cs_auto_lemans_8255: jpv61edr5 +mobilenet_v2_QNN-cs_auto_lemans_8775: jgjv0o7eg +mobilenet_v2_QNN-cs_auto_lemans_8650: jpedr8zv5 +mobilenet_v2_QNN-cs_auto_makena_8295: jgz3x8mx5 +mobilenet_v2_QNN-cs_xr_8450: j5wed87m5 +mobilenet_v2_ONNX: jg9l3km8g +mobilenet_v2_ONNX_FP16: jp14d7j7p +mnasnet05_TFLITE: jgdxr83zp +mnasnet05_QNN: j57yjk495 +mnasnet05_QNN-cs_8_elite: jp4lxm115 +mnasnet05_QNN-cs_8550: jpxk734l5 +mnasnet05_QNN-cs_x_elite: j5mnwom9p +mnasnet05_QNN-cs_auto_lemans_8255: jgn69onq5 +mnasnet05_QNN-cs_auto_lemans_8775: jprv4o07g +mnasnet05_QNN-cs_auto_lemans_8650: jp2k74wqp +mnasnet05_QNN-cs_auto_makena_8295: jpy14qxlp +mnasnet05_QNN-cs_xr_8450: jp0z1djn5 +mnasnet05_ONNX: jp8q36xop +mnasnet05_ONNX_FP16: jgkelo4ng +midas_quantized_TFLITE: jglv0oym5 +midas_quantized_QNN: j56y3r8yp +midas_quantized_QNN-cs_8_elite: jp3j4xzng +midas_quantized_QNN-cs_6490: jgo21olkp +midas_quantized_QNN-cs_8550: jpv61elr5 +midas_quantized_QNN-cs_x_elite: jgjv0oreg +midas_quantized_QNN-cs_auto_lemans_8255: jpedr87v5 +midas_quantized_QNN-cs_auto_lemans_8775: jgz3x8lx5 +midas_quantized_QNN-cs_auto_lemans_8650: j5wed8lm5 +midas_quantized_QNN-cs_auto_makena_8295: jg9l3kz8g +midas_quantized_QNN-cs_xr_8450: jp14d7n7p +midas_quantized_ONNX: jgdxr8dzp +midas_TFLITE: j57yjke95 +midas_QNN: jp4lxmy15 +midas_QNN-cs_8_elite: jpxk73ll5 +midas_QNN-cs_8550: j5mnwo09p +midas_QNN-cs_x_elite: jgn69ozq5 +midas_QNN-cs_auto_lemans_8255: jprv4ol7g +midas_QNN-cs_auto_lemans_8775: jp2k74rqp +midas_QNN-cs_auto_lemans_8650: jpy14qolp +midas_QNN-cs_auto_makena_8295: jp0z1dmn5 +midas_QNN-cs_xr_8450: jp8q36eop +midas_ONNX: jgkelo2ng +midas_ONNX_FP16: j5q67zlop +mediapipe_selfie_TFLITE: jglv0owm5 +mediapipe_selfie_QNN: j56y3royp +mediapipe_selfie_QNN-cs_8_elite: jp3j4xong +mediapipe_selfie_QNN-cs_8550: jgo21odkp +mediapipe_selfie_QNN-cs_x_elite: jpv61e2r5 +mediapipe_selfie_QNN-cs_auto_lemans_8255: jgjv0o3eg +mediapipe_selfie_QNN-cs_auto_lemans_8775: jpedr86v5 +mediapipe_selfie_QNN-cs_auto_lemans_8650: jgz3x8zx5 +mediapipe_selfie_QNN-cs_auto_makena_8295: jg9l3ko8g +mediapipe_selfie_QNN-cs_xr_8450: jp14d7o7p +mediapipe_selfie_ONNX: jgdxr86zp +mediapipe_selfie_ONNX_FP16: j57yjko95 +mediapipe_pose_TFLITE_MediaPipePoseDetector: jp4lxme15 +mediapipe_pose_TFLITE_MediaPipePoseLandmarkDetector: jpxk730l5 +mediapipe_pose_QNN_MediaPipePoseDetector: j5mnwo99p +mediapipe_pose_QNN_MediaPipePoseLandmarkDetector: jgn69o1q5 +mediapipe_pose_QNN-cs_8_elite_MediaPipePoseDetector: jprv4ox7g +mediapipe_pose_QNN-cs_8_elite_MediaPipePoseLandmarkDetector: jp2k74oqp +mediapipe_pose_QNN-cs_8550_MediaPipePoseDetector: jpy14q8lp +mediapipe_pose_QNN-cs_8550_MediaPipePoseLandmarkDetector: jp0z1don5 +mediapipe_pose_QNN-cs_x_elite_MediaPipePoseDetector: jp8q36jop +mediapipe_pose_QNN-cs_x_elite_MediaPipePoseLandmarkDetector: jgkelo6ng +mediapipe_pose_QNN-cs_auto_lemans_8255_MediaPipePoseDetector: j5q67z4op +mediapipe_pose_QNN-cs_auto_lemans_8255_MediaPipePoseLandmarkDetector: jglv0o8m5 +mediapipe_pose_QNN-cs_auto_lemans_8775_MediaPipePoseDetector: j56y3rmyp +mediapipe_pose_QNN-cs_auto_lemans_8775_MediaPipePoseLandmarkDetector: jp3j4x7ng +mediapipe_pose_QNN-cs_auto_lemans_8650_MediaPipePoseDetector: jgo21owkp +mediapipe_pose_QNN-cs_auto_lemans_8650_MediaPipePoseLandmarkDetector: jpv61emr5 +mediapipe_pose_QNN-cs_auto_makena_8295_MediaPipePoseDetector: jgjv0oyeg +mediapipe_pose_QNN-cs_auto_makena_8295_MediaPipePoseLandmarkDetector: jpedr8xv5 +mediapipe_pose_QNN-cs_xr_8450_MediaPipePoseDetector: jgz3x8yx5 +mediapipe_pose_QNN-cs_xr_8450_MediaPipePoseLandmarkDetector: j5wed8zm5 +mediapipe_pose_ONNX_MediaPipePoseDetector: jg9l3k28g +mediapipe_pose_ONNX_MediaPipePoseLandmarkDetector: jp14d717p +mediapipe_pose_ONNX_FP16_MediaPipePoseDetector: jgdxr84zp +mediapipe_pose_ONNX_FP16_MediaPipePoseLandmarkDetector: j5wed8z45 +mediapipe_hand_TFLITE_MediaPipeHandDetector: jg9l3k2mg +mediapipe_hand_TFLITE_MediaPipeHandLandmarkDetector: jp14d71np +mediapipe_hand_QNN_MediaPipeHandDetector: jgdxr846p +mediapipe_hand_QNN_MediaPipeHandLandmarkDetector: j57yjknn5 +mediapipe_hand_QNN-cs_8_elite_MediaPipeHandDetector: jp4lxm425 +mediapipe_hand_QNN-cs_8_elite_MediaPipeHandLandmarkDetector: jpxk73r85 +mediapipe_hand_QNN-cs_8550_MediaPipeHandDetector: j5mnwok7p +mediapipe_hand_QNN-cs_8550_MediaPipeHandLandmarkDetector: jgn69oqj5 +mediapipe_hand_QNN-cs_x_elite_MediaPipeHandDetector: jprv4odkg +mediapipe_hand_QNN-cs_x_elite_MediaPipeHandLandmarkDetector: jp2k74d6p +mediapipe_hand_QNN-cs_auto_lemans_8255_MediaPipeHandDetector: jpy14q20p +mediapipe_hand_QNN-cs_auto_lemans_8255_MediaPipeHandLandmarkDetector: jp0z1dx05 +mediapipe_hand_QNN-cs_auto_lemans_8775_MediaPipeHandDetector: jp8q36kqp +mediapipe_hand_QNN-cs_auto_lemans_8775_MediaPipeHandLandmarkDetector: jgkelokvg +mediapipe_hand_QNN-cs_auto_lemans_8650_MediaPipeHandDetector: j5q67zdep +mediapipe_hand_QNN-cs_auto_lemans_8650_MediaPipeHandLandmarkDetector: jglv0oq25 +mediapipe_hand_QNN-cs_auto_makena_8295_MediaPipeHandDetector: j56y3r0np +mediapipe_hand_QNN-cs_auto_makena_8295_MediaPipeHandLandmarkDetector: jp3j4xrmg +mediapipe_hand_QNN-cs_xr_8450_MediaPipeHandDetector: jgo21o91p +mediapipe_hand_QNN-cs_xr_8450_MediaPipeHandLandmarkDetector: jpv61enz5 +mediapipe_hand_ONNX_MediaPipeHandDetector: jgjv0o81g +mediapipe_hand_ONNX_MediaPipeHandLandmarkDetector: jpedr8n85 +mediapipe_hand_ONNX_FP16_MediaPipeHandDetector: jgz3x8045 +mediapipe_hand_ONNX_FP16_MediaPipeHandLandmarkDetector: j5wed8r45 +mediapipe_face_quantized_TFLITE_MediaPipeFaceDetector: jg9l3kqmg +mediapipe_face_quantized_TFLITE_MediaPipeFaceLandmarkDetector: jp14d7mnp +mediapipe_face_quantized_QNN_MediaPipeFaceDetector: jgdxr8m6p +mediapipe_face_quantized_QNN_MediaPipeFaceLandmarkDetector: j57yjk8n5 +mediapipe_face_quantized_QNN-cs_8_elite_MediaPipeFaceDetector: jp4lxm225 +mediapipe_face_quantized_QNN-cs_8_elite_MediaPipeFaceLandmarkDetector: jpxk73z85 +mediapipe_face_quantized_QNN-cs_6490_MediaPipeFaceDetector: j5mnwol7p +mediapipe_face_quantized_QNN-cs_6490_MediaPipeFaceLandmarkDetector: jgn69owj5 +mediapipe_face_quantized_QNN-cs_8550_MediaPipeFaceDetector: jprv4o7kg +mediapipe_face_quantized_QNN-cs_8550_MediaPipeFaceLandmarkDetector: jp2k74z6p +mediapipe_face_quantized_QNN-cs_x_elite_MediaPipeFaceDetector: jpy14qy0p +mediapipe_face_quantized_QNN-cs_x_elite_MediaPipeFaceLandmarkDetector: jp0z1d805 +mediapipe_face_quantized_QNN-cs_auto_lemans_8255_MediaPipeFaceDetector: jp8q36dqp +mediapipe_face_quantized_QNN-cs_auto_lemans_8255_MediaPipeFaceLandmarkDetector: jgkelowvg +mediapipe_face_quantized_QNN-cs_auto_lemans_8775_MediaPipeFaceDetector: j5q67zxep +mediapipe_face_quantized_QNN-cs_auto_lemans_8775_MediaPipeFaceLandmarkDetector: jglv0o925 +mediapipe_face_quantized_QNN-cs_auto_lemans_8650_MediaPipeFaceDetector: j56y3r9np +mediapipe_face_quantized_QNN-cs_auto_lemans_8650_MediaPipeFaceLandmarkDetector: jp3j4xlmg +mediapipe_face_quantized_QNN-cs_auto_makena_8295_MediaPipeFaceDetector: jgo21o71p +mediapipe_face_quantized_QNN-cs_auto_makena_8295_MediaPipeFaceLandmarkDetector: jpv61eyz5 +mediapipe_face_quantized_QNN-cs_xr_8450_MediaPipeFaceDetector: jgjv0o61g +mediapipe_face_quantized_QNN-cs_xr_8450_MediaPipeFaceLandmarkDetector: jpedr8085 +mediapipe_face_quantized_ONNX_MediaPipeFaceDetector: jgz3x8q45 +mediapipe_face_quantized_ONNX_MediaPipeFaceLandmarkDetector: j5wed8045 +mediapipe_face_TFLITE_MediaPipeFaceDetector: jg9l3k7mg +mediapipe_face_TFLITE_MediaPipeFaceLandmarkDetector: jp14d7knp +mediapipe_face_QNN_MediaPipeFaceDetector: jgdxr8y6p +mediapipe_face_QNN_MediaPipeFaceLandmarkDetector: j57yjk1n5 +mediapipe_face_QNN-cs_8_elite_MediaPipeFaceDetector: jp4lxm625 +mediapipe_face_QNN-cs_8_elite_MediaPipeFaceLandmarkDetector: jpxk73885 +mediapipe_face_QNN-cs_8550_MediaPipeFaceDetector: j5mnwo17p +mediapipe_face_QNN-cs_8550_MediaPipeFaceLandmarkDetector: jgn69odj5 +mediapipe_face_QNN-cs_x_elite_MediaPipeFaceDetector: jprv4omkg +mediapipe_face_QNN-cs_x_elite_MediaPipeFaceLandmarkDetector: jp2k74q6p +mediapipe_face_QNN-cs_auto_lemans_8255_MediaPipeFaceDetector: jpy14qk0p +mediapipe_face_QNN-cs_auto_lemans_8255_MediaPipeFaceLandmarkDetector: jp0z1dw05 +mediapipe_face_QNN-cs_auto_lemans_8775_MediaPipeFaceDetector: jp8q36nqp +mediapipe_face_QNN-cs_auto_lemans_8775_MediaPipeFaceLandmarkDetector: jgkelo1vg +mediapipe_face_QNN-cs_auto_lemans_8650_MediaPipeFaceDetector: j5q67znep +mediapipe_face_QNN-cs_auto_lemans_8650_MediaPipeFaceLandmarkDetector: jglv0od25 +mediapipe_face_QNN-cs_auto_makena_8295_MediaPipeFaceDetector: j56y3rxnp +mediapipe_face_QNN-cs_auto_makena_8295_MediaPipeFaceLandmarkDetector: jp3j4xdmg +mediapipe_face_QNN-cs_xr_8450_MediaPipeFaceDetector: jgo21ox1p +mediapipe_face_QNN-cs_xr_8450_MediaPipeFaceLandmarkDetector: jpv61e8z5 +mediapipe_face_ONNX_MediaPipeFaceDetector: jgjv0o91g +mediapipe_face_ONNX_MediaPipeFaceLandmarkDetector: jpedr8q85 +mediapipe_face_ONNX_FP16_MediaPipeFaceDetector: jgz3x8645 +mediapipe_face_ONNX_FP16_MediaPipeFaceLandmarkDetector: j5wed8k45 +litehrnet_TFLITE: j57yjkmn5 +litehrnet_QNN: jp4lxm725 +litehrnet_QNN-cs_8_elite: jpxk73q85 +litehrnet_QNN-cs_8550: j5mnwo77p +litehrnet_QNN-cs_x_elite: jgn69o4j5 +litehrnet_QNN-cs_auto_lemans_8255: jprv4orkg +litehrnet_QNN-cs_auto_lemans_8775: jp2k7416p +litehrnet_QNN-cs_auto_lemans_8650: jpy14ql0p +litehrnet_QNN-cs_auto_makena_8295: jp0z1dn05 +litehrnet_QNN-cs_xr_8450: jp8q36lqp +litehrnet_ONNX: jgkelojvg +litehrnet_ONNX_FP16: j5q67zjep +lama_dilated_TFLITE: jglv0oj25 +lama_dilated_QNN: j56y3rknp +lama_dilated_QNN-cs_8_elite: jp3j4xymg +lama_dilated_QNN-cs_8550: jgo21oj1p +lama_dilated_QNN-cs_x_elite: jpv61ejz5 +lama_dilated_QNN-cs_auto_lemans_8255: jgjv0oj1g +lama_dilated_QNN-cs_auto_lemans_8775: jpedr8j85 +lama_dilated_QNN-cs_auto_lemans_8650: jgz3x8145 +lama_dilated_QNN-cs_auto_makena_8295: j5wed8j45 +lama_dilated_QNN-cs_xr_8450: jg9l3k6mg +lama_dilated_ONNX: jp14d7rnp +lama_dilated_ONNX_FP16: jgdxr8j6p +inception_v3_TFLITE: j5wed8jz5 +inception_v3_QNN: jg9l3k6qg +inception_v3_QNN-cs_8_elite: jp14d7rkp +inception_v3_QNN-cs_8550: jgdxr8jkp +inception_v3_QNN-cs_x_elite: j57yjkqq5 +inception_v3_QNN-cs_auto_lemans_8255: jp4lxmzq5 +inception_v3_QNN-cs_auto_lemans_8775: jpxk73wj5 +inception_v3_QNN-cs_auto_lemans_8650: j5mnwojyp +inception_v3_QNN-cs_auto_makena_8295: jgn69o2v5 +inception_v3_QNN-cs_xr_8450: jprv4okvg +inception_v3_ONNX: jp2k748xp +inception_v3_ONNX_FP16: jpy14qerp +huggingface_wavlm_base_plus_TFLITE: jp0z1dy25 +huggingface_wavlm_base_plus_QNN: jp8q36ozp +huggingface_wavlm_base_plus_QNN-cs_8_elite: jgkelozyg +huggingface_wavlm_base_plus_QNN-cs_8550: j5q67z87p +huggingface_wavlm_base_plus_QNN-cs_x_elite: jglv0one5 +huggingface_wavlm_base_plus_QNN-cs_auto_lemans_8255: j56y3r6vp +huggingface_wavlm_base_plus_QNN-cs_auto_lemans_8775: jp3j4xkxg +huggingface_wavlm_base_plus_QNN-cs_auto_lemans_8650: jgo21oy4p +huggingface_wavlm_base_plus_QNN-cs_auto_makena_8295: jpv61e375 +huggingface_wavlm_base_plus_QNN-cs_xr_8450: jgjv0ox7g +huggingface_wavlm_base_plus_ONNX: jpedr8975 +huggingface_wavlm_base_plus_ONNX_FP16: jgz3x8ez5 +hrnet_pose_quantized_TFLITE: jg9l3kvqg +hrnet_pose_quantized_QNN: jp14d70kp +hrnet_pose_quantized_QNN-cs_8_elite: jgdxr8wkp +hrnet_pose_quantized_QNN-cs_6490: j57yjkzq5 +hrnet_pose_quantized_QNN-cs_8550: jp4lxmqq5 +hrnet_pose_quantized_QNN-cs_x_elite: jpxk73vj5 +hrnet_pose_quantized_QNN-cs_auto_lemans_8255: j5mnworyp +hrnet_pose_quantized_QNN-cs_auto_lemans_8775: jgn69oyv5 +hrnet_pose_quantized_QNN-cs_auto_lemans_8650: jprv4oqvg +hrnet_pose_quantized_QNN-cs_auto_makena_8295: jp2k746xp +hrnet_pose_quantized_QNN-cs_xr_8450: jpy14qwrp +hrnet_pose_quantized_ONNX: jp0z1dq25 +hrnet_pose_TFLITE: jp8q369zp +hrnet_pose_QNN: jgkelonyg +hrnet_pose_QNN-cs_8_elite: j5q67zk7p +hrnet_pose_QNN-cs_8550: jglv0oze5 +hrnet_pose_QNN-cs_x_elite: j56y3rjvp +hrnet_pose_QNN-cs_auto_lemans_8255: jp3j4x3xg +hrnet_pose_QNN-cs_auto_lemans_8775: jgo21o04p +hrnet_pose_QNN-cs_auto_lemans_8650: jpv61eo75 +hrnet_pose_QNN-cs_auto_makena_8295: jgjv0om7g +hrnet_pose_QNN-cs_xr_8450: jpedr8175 +hrnet_pose_ONNX: jgz3x89z5 +hrnet_pose_ONNX_FP16: j5wed8vz5 +googlenet_TFLITE: jg9l3k1qg +googlenet_QNN: jp14d7lkp +googlenet_QNN-cs_8_elite: jgdxr89kp +googlenet_QNN-cs_8550: j57yjkwq5 +googlenet_QNN-cs_x_elite: jp4lxmoq5 +googlenet_QNN-cs_auto_lemans_8255: jpxk73jj5 +googlenet_QNN-cs_auto_lemans_8775: j5mnwo2yp +googlenet_QNN-cs_auto_lemans_8650: jgn69o8v5 +googlenet_QNN-cs_auto_makena_8295: jprv4ojvg +googlenet_QNN-cs_xr_8450: jp2k74nxp +googlenet_ONNX: jpy14q0rp +googlenet_ONNX_FP16: jp0z1d725 +gear_guard_net_quantized_TFLITE: jp8q36vzp +gear_guard_net_quantized_QNN: jgkelomyg +gear_guard_net_quantized_QNN-cs_8_elite: j5q67zo7p +gear_guard_net_quantized_QNN-cs_6490: jglv0ore5 +gear_guard_net_quantized_QNN-cs_8550: j56y3rlvp +gear_guard_net_quantized_QNN-cs_x_elite: jp3j4x2xg +gear_guard_net_quantized_QNN-cs_auto_lemans_8255: jgo21oq4p +gear_guard_net_quantized_QNN-cs_auto_lemans_8775: jpv61ex75 +gear_guard_net_quantized_QNN-cs_auto_lemans_8650: jgjv0o47g +gear_guard_net_quantized_QNN-cs_auto_makena_8295: jpedr8375 +gear_guard_net_quantized_QNN-cs_xr_8450: jgz3x8kz5 +gear_guard_net_quantized_ONNX: j5wed8nz5 +gear_guard_net_TFLITE: jg9l3keqg +gear_guard_net_QNN: jp14d7xkp +gear_guard_net_QNN-cs_8_elite: jgdxr8lkp +gear_guard_net_QNN-cs_8550: j57yjk3q5 +gear_guard_net_QNN-cs_x_elite: jp4lxm0q5 +gear_guard_net_QNN-cs_auto_lemans_8255: jpxk732j5 +gear_guard_net_QNN-cs_auto_lemans_8775: j5mnwoyyp +gear_guard_net_QNN-cs_auto_lemans_8650: jgn697vv5 +gear_guard_net_QNN-cs_auto_makena_8295: jprv4n3vg +gear_guard_net_QNN-cs_xr_8450: jp2k7vyxp +gear_guard_net_ONNX: jpy1473rp +gear_guard_net_ONNX_FP16: jp0z1v025 +foot_track_net_quantized_TFLITE: jp8q34yzp +foot_track_net_quantized_QNN: jgkel9xyg +foot_track_net_quantized_QNN-cs_8_elite: j5q67mq7p +foot_track_net_quantized_QNN-cs_6490: jglv01me5 +foot_track_net_quantized_QNN-cs_8550: j56y3d4vp +foot_track_net_quantized_QNN-cs_x_elite: jp3j4w0xg +foot_track_net_quantized_QNN-cs_auto_lemans_8255: jgo21464p +foot_track_net_quantized_QNN-cs_auto_lemans_8775: jpv619k75 +foot_track_net_quantized_QNN-cs_auto_lemans_8650: jgjv0wn7g +foot_track_net_quantized_QNN-cs_auto_makena_8295: jpedrlm75 +foot_track_net_quantized_QNN-cs_xr_8450: jgz3x4dz5 +foot_track_net_quantized_ONNX: j5wed16z5 +foot_track_net_TFLITE: jg9l3xnqg +foot_track_net_QNN: jp14dvzkp +foot_track_net_QNN-cs_8_elite: jgdxrz1kp +foot_track_net_QNN-cs_8550: j5wed16j5 +foot_track_net_QNN-cs_x_elite: jg9l3xnvg +foot_track_net_QNN-cs_auto_lemans_8255: jp14dvzlp +foot_track_net_QNN-cs_auto_lemans_8775: jgdxrz1lp +foot_track_net_QNN-cs_auto_lemans_8650: j57yj7yr5 +foot_track_net_QNN-cs_auto_makena_8295: jp4lx9ll5 +foot_track_net_QNN-cs_xr_8450: jpxk7dk95 +foot_track_net_ONNX: j5mnwdnqp +foot_track_net_ONNX_FP16: jgn6976m5 +ffnet_78s_quantized_TFLITE: jp2k7vkmp +ffnet_78s_quantized_QNN: jpy14714p +ffnet_78s_quantized_QNN-cs_8_elite: jp0z1vze5 +ffnet_78s_quantized_QNN-cs_6490: jp8q34q8p +ffnet_78s_quantized_QNN-cs_8550: jgkel9eog +ffnet_78s_quantized_QNN-cs_x_elite: j5q67m6mp +ffnet_78s_quantized_QNN-cs_auto_lemans_8255: jglv01vl5 +ffnet_78s_quantized_QNN-cs_auto_lemans_8775: j56y3dy7p +ffnet_78s_quantized_QNN-cs_auto_lemans_8650: jp3j4wjzg +ffnet_78s_quantized_QNN-cs_auto_makena_8295: jgo2142dp +ffnet_78s_quantized_QNN-cs_xr_8450: jpv6196m5 +ffnet_78s_quantized_ONNX: jgjv0wv8g +ffnet_78s_lowres_TFLITE: jpedrld05 +ffnet_78s_lowres_QNN: jgz3x4365 +ffnet_78s_lowres_QNN-cs_8_elite: j5wed1ej5 +ffnet_78s_lowres_QNN-cs_8550: jg9l3xlvg +ffnet_78s_lowres_QNN-cs_x_elite: jp14dv4lp +ffnet_78s_lowres_QNN-cs_auto_lemans_8255: jgdxrzxlp +ffnet_78s_lowres_QNN-cs_auto_lemans_8775: j57yj72r5 +ffnet_78s_lowres_QNN-cs_auto_lemans_8650: jp4lx9nl5 +ffnet_78s_lowres_QNN-cs_auto_makena_8295: jpxk7d995 +ffnet_78s_lowres_QNN-cs_xr_8450: j5mnwdeqp +ffnet_78s_lowres_ONNX: jgn6970m5 +ffnet_78s_lowres_ONNX_FP16: jprv4n6eg +ffnet_78s_TFLITE: jp2k7vxmp +ffnet_78s_QNN: jpy147z4p +ffnet_78s_QNN-cs_8_elite: jp0z1v4e5 +ffnet_78s_QNN-cs_8550: jp8q3428p +ffnet_78s_QNN-cs_x_elite: jgkel9vog +ffnet_78s_QNN-cs_auto_lemans_8255: j5q67m0mp +ffnet_78s_QNN-cs_auto_lemans_8775: jglv014l5 +ffnet_78s_QNN-cs_auto_lemans_8650: j56y3d27p +ffnet_78s_QNN-cs_auto_makena_8295: jp3j4wnzg +ffnet_78s_QNN-cs_xr_8450: jgo214zdp +ffnet_78s_ONNX: jpv619qm5 +ffnet_78s_ONNX_FP16: jgjv0wd8g +ffnet_54s_quantized_TFLITE: jpedrlo05 +ffnet_54s_quantized_QNN: jgz3x4265 +ffnet_54s_quantized_QNN-cs_8_elite: j5wed1wj5 +ffnet_54s_quantized_QNN-cs_6490: jg9l3x0vg +ffnet_54s_quantized_QNN-cs_8550: jp14dv2lp +ffnet_54s_quantized_QNN-cs_x_elite: jgdxrznlp +ffnet_54s_quantized_QNN-cs_auto_lemans_8255: j57yj70r5 +ffnet_54s_quantized_QNN-cs_auto_lemans_8775: jp4lx9kl5 +ffnet_54s_quantized_QNN-cs_auto_lemans_8650: jpxk7dn95 +ffnet_54s_quantized_QNN-cs_auto_makena_8295: j5mnwdqqp +ffnet_54s_quantized_QNN-cs_xr_8450: jgn697lm5 +ffnet_54s_quantized_ONNX: jprv4n8eg +ffnet_54s_TFLITE: jp2k7v0mp +ffnet_54s_QNN: jpy147r4p +ffnet_54s_QNN-cs_8_elite: jp0z1v3e5 +ffnet_54s_QNN-cs_8550: jp8q3408p +ffnet_54s_QNN-cs_x_elite: jgkel97og +ffnet_54s_QNN-cs_auto_lemans_8255: j5q67memp +ffnet_54s_QNN-cs_auto_lemans_8775: jglv016l5 +ffnet_54s_QNN-cs_auto_lemans_8650: j56y3de7p +ffnet_54s_QNN-cs_auto_makena_8295: jp3j4wvzg +ffnet_54s_QNN-cs_xr_8450: jgo214kdp +ffnet_54s_ONNX: jpv6190m5 +ffnet_54s_ONNX_FP16: jgjv0wz8g +ffnet_40s_quantized_TFLITE: jpedrle05 +ffnet_40s_quantized_QNN: jgz3x4o65 +ffnet_40s_quantized_QNN-cs_8_elite: j5wed12j5 +ffnet_40s_quantized_QNN-cs_6490: jg9l3xjvg +ffnet_40s_quantized_QNN-cs_8550: jp14dvylp +ffnet_40s_quantized_QNN-cs_x_elite: jgdxrzelp +ffnet_40s_quantized_QNN-cs_auto_lemans_8255: j57yj7lr5 +ffnet_40s_quantized_QNN-cs_auto_lemans_8775: jp4lx9dl5 +ffnet_40s_quantized_QNN-cs_auto_lemans_8650: jpxk7d695 +ffnet_40s_quantized_QNN-cs_auto_makena_8295: j5mnwd6qp +ffnet_40s_quantized_QNN-cs_xr_8450: jgn697mm5 +ffnet_40s_quantized_ONNX: jprv4n2eg +ffnet_40s_TFLITE: jp2k7v9mp +ffnet_40s_QNN: jpy147j4p +ffnet_40s_QNN-cs_8_elite: jp0z1v2e5 +ffnet_40s_QNN-cs_8550: jp8q34m8p +ffnet_40s_QNN-cs_x_elite: jgkel9qog +ffnet_40s_QNN-cs_auto_lemans_8255: j5q67mrmp +ffnet_40s_QNN-cs_auto_lemans_8775: jglv012l5 +ffnet_40s_QNN-cs_auto_lemans_8650: j56y3dz7p +ffnet_40s_QNN-cs_auto_makena_8295: jp3j4w1zg +ffnet_40s_QNN-cs_xr_8450: jgo214ndp +ffnet_40s_ONNX: jpv619rm5 +ffnet_40s_ONNX_FP16: jgjv0w28g +ffnet_122ns_lowres_TFLITE: jpedrlw05 +ffnet_122ns_lowres_QNN: jgz3x4j65 +ffnet_122ns_lowres_QNN-cs_8_elite: j5wed13j5 +ffnet_122ns_lowres_QNN-cs_8550: jg9l3xyvg +ffnet_122ns_lowres_QNN-cs_x_elite: jp14dvwlp +ffnet_122ns_lowres_QNN-cs_auto_lemans_8255: jgdxrzqlp +ffnet_122ns_lowres_QNN-cs_auto_lemans_8775: j5wed1q65 +ffnet_122ns_lowres_QNN-cs_auto_lemans_8650: jg9l3xwlg +ffnet_122ns_lowres_QNN-cs_auto_makena_8295: jp14dve2p +ffnet_122ns_lowres_QNN-cs_xr_8450: jgdxrzoep +ffnet_122ns_lowres_ONNX: j57yj7xl5 +ffnet_122ns_lowres_ONNX_FP16: jp4lx9vv5 +fcn_resnet50_TFLITE: jpxk7dy15 +fcn_resnet50_QNN: j5mnwd3wp +fcn_resnet50_QNN-cs_8_elite: jgn6973r5 +fcn_resnet50_QNN-cs_8550: jprv4ne9g +fcn_resnet50_QNN-cs_x_elite: jp2k7vl4p +fcn_resnet50_QNN-cs_auto_lemans_8255: jpy14767p +fcn_resnet50_QNN-cs_auto_lemans_8775: jp0z1vl65 +fcn_resnet50_QNN-cs_auto_lemans_8650: jp8q34zxp +fcn_resnet50_QNN-cs_auto_makena_8295: jgkel932g +fcn_resnet50_QNN-cs_xr_8450: j5q67m34p +fcn_resnet50_ONNX: jglv01385 +fcn_resnet50_ONNX_FP16: j56y3dn0p +fastsam_x_TFLITE: jgo2143xp +fastsam_x_QNN: jpv619vj5 +fastsam_x_QNN-cs_8_elite: jgjv0wexg +fastsam_x_QNN-cs_8550: jpedrlk15 +fastsam_x_QNN-cs_x_elite: jgz3x4rk5 +fastsam_x_QNN-cs_auto_lemans_8255: j5wed1965 +fastsam_x_QNN-cs_auto_lemans_8775: jg9l3x4lg +fastsam_x_QNN-cs_auto_lemans_8650: jp14dv82p +fastsam_x_QNN-cs_auto_makena_8295: jgdxrzvep +fastsam_x_QNN-cs_xr_8450: j57yj7dl5 +fastsam_x_ONNX: jp4lx9wv5 +fastsam_x_ONNX_FP16: jpxk7d115 +fastsam_s_TFLITE: j5mnwdzwp +fastsam_s_QNN: jgn697er5 +fastsam_s_QNN-cs_8_elite: jprv4ny9g +fastsam_s_QNN-cs_8550: jp2k7vm4p +fastsam_s_QNN-cs_x_elite: jpy147d7p +fastsam_s_QNN-cs_auto_lemans_8255: jp0z1vr65 +fastsam_s_QNN-cs_auto_lemans_8775: jp8q347xp +fastsam_s_QNN-cs_auto_lemans_8650: jgkel9y2g +fastsam_s_QNN-cs_auto_makena_8295: j5q67m24p +fastsam_s_QNN-cs_xr_8450: jglv01k85 +fastsam_s_ONNX: j56y3d10p +fastsam_s_ONNX_FP16: jp3j4wmlg +facemap_3dmm_TFLITE: jgo214vxp +facemap_3dmm_QNN: jpv619wj5 +facemap_3dmm_QNN-cs_8_elite: jgjv0wlxg +facemap_3dmm_QNN-cs_8550: jpedrlv15 +facemap_3dmm_QNN-cs_x_elite: jgz3x47k5 +facemap_3dmm_QNN-cs_auto_lemans_8255: j5wed1d65 +facemap_3dmm_QNN-cs_auto_lemans_8775: jg9l3x3lg +facemap_3dmm_QNN-cs_auto_lemans_8650: jp14dvd2p +facemap_3dmm_QNN-cs_auto_makena_8295: jgdxrzrep +facemap_3dmm_QNN-cs_xr_8450: j57yj7jl5 +facemap_3dmm_ONNX: jp4lx9xv5 +facemap_3dmm_ONNX_FP16: jpxk7d715 +face_det_lite_TFLITE: j5mnwdwwp +face_det_lite_QNN: jgn6979r5 +face_det_lite_QNN-cs_8_elite: jprv4n49g +face_det_lite_QNN-cs_8550: jp2k7v74p +face_det_lite_QNN-cs_x_elite: jpy14747p +face_det_lite_QNN-cs_auto_lemans_8255: jp0z1v165 +face_det_lite_QNN-cs_auto_lemans_8775: jp8q343xp +face_det_lite_QNN-cs_auto_lemans_8650: jgkel9l2g +face_det_lite_QNN-cs_auto_makena_8295: j5q67m74p +face_det_lite_QNN-cs_xr_8450: jglv01085 +face_det_lite_ONNX: j56y3d30p +face_det_lite_ONNX_FP16: jp3j4w4lg +face_body_net_TFLITE: jgo2141xp +face_body_net_QNN: jpv6191j5 +face_body_net_QNN-cs_8_elite: jgjv0w0xg +face_body_net_QNN-cs_8550: jpedrlr15 +face_body_net_QNN-cs_x_elite: jgz3x4xk5 +face_body_net_QNN-cs_auto_lemans_8255: j5wed1m65 +face_body_net_QNN-cs_auto_lemans_8775: jg9l3x9lg +face_body_net_QNN-cs_auto_lemans_8650: jp14dvq2p +face_body_net_QNN-cs_auto_makena_8295: jgdxrz7ep +face_body_net_QNN-cs_xr_8450: j57yj7vl5 +face_body_net_ONNX: jp4lx9jv5 +face_body_net_ONNX_FP16: jpxk7de15 +face_attrib_net_TFLITE: j5mnwdvwp +face_attrib_net_QNN: jgn697rr5 +face_attrib_net_QNN-cs_8_elite: jprv4n19g +face_attrib_net_QNN-cs_8550: jp2k7v34p +face_attrib_net_QNN-cs_x_elite: jpy147v7p +face_attrib_net_QNN-cs_auto_lemans_8255: jp0z1ve65 +face_attrib_net_QNN-cs_auto_lemans_8775: jp8q34wxp +face_attrib_net_QNN-cs_auto_lemans_8650: jgkel9r2g +face_attrib_net_QNN-cs_auto_makena_8295: j5q67m94p +face_attrib_net_QNN-cs_xr_8450: jglv01e85 +face_attrib_net_ONNX: j56y3dq0p +face_attrib_net_ONNX_FP16: jp3j4wqlg +esrgan_TFLITE: jgo214exp +esrgan_QNN: jpv619zj5 +esrgan_QNN-cs_8_elite: jgjv0wkxg +esrgan_QNN-cs_8550: jpedrl415 +esrgan_QNN-cs_x_elite: jgz3x4vk5 +esrgan_QNN-cs_auto_lemans_8255: j5wed1x65 +esrgan_QNN-cs_auto_lemans_8775: jg9l3x8lg +esrgan_QNN-cs_auto_lemans_8650: jp14dv32p +esrgan_QNN-cs_auto_makena_8295: jgdxrz0ep +esrgan_QNN-cs_xr_8450: j5wed1x35 +esrgan_ONNX: jg9l3x8wg +esrgan_ONNX_FP16: jp14dv38p +efficientvit_l2_cls_TFLITE: jgdxrz0rp +efficientvit_l2_cls_QNN: j57yj76v5 +efficientvit_l2_cls_QNN-cs_8_elite: jp4lx9885 +efficientvit_l2_cls_QNN-cs_8550: jpxk7dm35 +efficientvit_l2_cls_QNN-cs_x_elite: j5mnwd4dp +efficientvit_l2_cls_QNN-cs_auto_lemans_8255: jgn697xk5 +efficientvit_l2_cls_QNN-cs_auto_lemans_8775: jprv4n90g +efficientvit_l2_cls_QNN-cs_auto_lemans_8650: jp2k7vjrp +efficientvit_l2_cls_QNN-cs_auto_makena_8295: jpy147n8p +efficientvit_l2_cls_QNN-cs_xr_8450: jp0z1vk95 +efficientvit_l2_cls_ONNX: jp8q348kp +efficientvit_l2_cls_ONNX_FP16: jgkel9dwg +efficientvit_b2_cls_TFLITE: j5q67mwnp +efficientvit_b2_cls_QNN: jglv017j5 +efficientvit_b2_cls_QNN-cs_8_elite: j56y3dv6p +efficientvit_b2_cls_QNN-cs_8550: jp3j4w83g +efficientvit_b2_cls_QNN-cs_x_elite: jgo214mqp +efficientvit_b2_cls_QNN-cs_auto_lemans_8255: jpv619ek5 +efficientvit_b2_cls_QNN-cs_auto_lemans_8775: jgjv0wovg +efficientvit_b2_cls_QNN-cs_auto_lemans_8650: jpedrl8o5 +efficientvit_b2_cls_QNN-cs_auto_makena_8295: jgz3x48o5 +efficientvit_b2_cls_QNN-cs_xr_8450: j5wed1835 +efficientvit_b2_cls_ONNX: jg9l3xkwg +efficientvit_b2_cls_ONNX_FP16: jp14dv78p +efficientnet_b4_TFLITE: jgdxrz8rp +efficientnet_b4_QNN: j57yj7kv5 +efficientnet_b4_QNN-cs_8_elite: jp4lx9m85 +efficientnet_b4_QNN-cs_8550: jpxk7d335 +efficientnet_b4_QNN-cs_x_elite: j5mnwdodp +efficientnet_b4_QNN-cs_auto_lemans_8255: jgn697ok5 +efficientnet_b4_QNN-cs_auto_lemans_8775: jprv4no0g +efficientnet_b4_QNN-cs_auto_lemans_8650: jp2k7v4rp +efficientnet_b4_QNN-cs_auto_makena_8295: jpy147q8p +efficientnet_b4_QNN-cs_xr_8450: jp0z1vd95 +efficientnet_b4_ONNX: jp8q346kp +efficientnet_b4_ONNX_FP16: jgkel9owg +efficientnet_b0_TFLITE: j5q67mznp +efficientnet_b0_QNN: jglv01oj5 +efficientnet_b0_QNN-cs_8_elite: j56y3dr6p +efficientnet_b0_QNN-cs_8550: jp3j4wx3g +efficientnet_b0_QNN-cs_x_elite: jgo214oqp +efficientnet_b0_QNN-cs_auto_lemans_8255: jpv6199k5 +efficientnet_b0_QNN-cs_auto_lemans_8775: jgjv0wwvg +efficientnet_b0_QNN-cs_auto_lemans_8650: jpedrllo5 +efficientnet_b0_QNN-cs_auto_makena_8295: jgz3x44o5 +efficientnet_b0_QNN-cs_xr_8450: j5wed1135 +efficientnet_b0_ONNX: jg9l3xxwg +efficientnet_b0_ONNX_FP16: jp14dvv8p +detr_resnet50_dc5_TFLITE: j57yj77v5 +detr_resnet50_dc5_QNN: jp4lx9985 +detr_resnet50_dc5_QNN-cs_8_elite: jpxk7dd35 +detr_resnet50_dc5_QNN-cs_8550: j5mnwdddp +detr_resnet50_dc5_QNN-cs_x_elite: jgn6977k5 +detr_resnet50_dc5_QNN-cs_auto_lemans_8255: jprv4nn0g +detr_resnet50_dc5_QNN-cs_auto_lemans_8775: jp2k7vvrp +detr_resnet50_dc5_QNN-cs_auto_lemans_8650: jpy14778p +detr_resnet50_dc5_QNN-cs_auto_makena_8295: jp0z1vv95 +detr_resnet50_dc5_QNN-cs_xr_8450: jp8q344kp +detr_resnet50_dc5_ONNX: jgkel99wg +detr_resnet50_dc5_ONNX_FP16: j5q67mmnp +detr_resnet50_TFLITE: jglv011j5 +detr_resnet50_QNN: j56y3dd6p +detr_resnet50_QNN-cs_8_elite: jp3j4ww3g +detr_resnet50_QNN-cs_8550: jgo2144qp +detr_resnet50_QNN-cs_x_elite: jpv6197k5 +detr_resnet50_QNN-cs_auto_lemans_8255: jgjv0wqvg +detr_resnet50_QNN-cs_auto_lemans_8775: jpedrlyo5 +detr_resnet50_QNN-cs_auto_lemans_8650: jgz3x4no5 +detr_resnet50_QNN-cs_auto_makena_8295: j5wed1435 +detr_resnet50_QNN-cs_xr_8450: jg9l3xdwg +detr_resnet50_ONNX: jp14dv68p +detr_resnet50_ONNX_FP16: jgdxrz2rp +detr_resnet101_dc5_TFLITE: j57yj79v5 +detr_resnet101_dc5_QNN: jp4lx9385 +detr_resnet101_dc5_QNN-cs_8_elite: jpxk7dx35 +detr_resnet101_dc5_QNN-cs_8550: j5mnwd8dp +detr_resnet101_dc5_QNN-cs_x_elite: jgn697kk5 +detr_resnet101_dc5_QNN-cs_auto_lemans_8255: jprv4nw0g +detr_resnet101_dc5_QNN-cs_auto_lemans_8775: jp2k7verp +detr_resnet101_dc5_QNN-cs_auto_lemans_8650: jpy147m8p +detr_resnet101_dc5_QNN-cs_auto_makena_8295: jp0z1v695 +detr_resnet101_dc5_QNN-cs_xr_8450: jp8q341kp +detr_resnet101_dc5_ONNX: jgkel98wg +detr_resnet101_dc5_ONNX_FP16: j5q67mvnp +detr_resnet101_TFLITE: jglv01lj5 +detr_resnet101_QNN: j56y3dw6p +detr_resnet101_QNN-cs_8_elite: jp3j4w63g +detr_resnet101_QNN-cs_8550: jgo2148qp +detr_resnet101_QNN-cs_x_elite: jpv619dk5 +detr_resnet101_QNN-cs_auto_lemans_8255: jgjv0w7vg +detr_resnet101_QNN-cs_auto_lemans_8775: jpedrlzo5 +detr_resnet101_QNN-cs_auto_lemans_8650: jgz3x4mo5 +detr_resnet101_QNN-cs_auto_makena_8295: j5wed1735 +detr_resnet101_QNN-cs_xr_8450: jg9l3xmwg +detr_resnet101_ONNX: jp14dvj8p +detr_resnet101_ONNX_FP16: jgdxrz3rp +densenet121_TFLITE: j5wed17m5 +densenet121_QNN: jg9l3xm8g +densenet121_QNN-cs_8_elite: jp14dvj7p +densenet121_QNN-cs_8550: jgdxrz3zp +densenet121_QNN-cs_x_elite: j57yj7495 +densenet121_QNN-cs_auto_lemans_8255: jp4lx9115 +densenet121_QNN-cs_auto_lemans_8775: jpxk7d4l5 +densenet121_QNN-cs_auto_lemans_8650: j5mnwdm9p +densenet121_QNN-cs_auto_makena_8295: jgn697nq5 +densenet121_QNN-cs_xr_8450: jprv4n07g +densenet121_ONNX: jp2k7vwqp +densenet121_ONNX_FP16: jpy147xlp +deeplabv3_resnet50_TFLITE: jp0z1vjn5 +deeplabv3_resnet50_QNN: jp8q34xop +deeplabv3_resnet50_QNN-cs_8_elite: jgkel94ng +deeplabv3_resnet50_QNN-cs_8550: j5q67myop +deeplabv3_resnet50_QNN-cs_x_elite: jglv01ym5 +deeplabv3_resnet50_QNN-cs_auto_lemans_8255: j56y3d8yp +deeplabv3_resnet50_QNN-cs_auto_lemans_8775: jp3j4wzng +deeplabv3_resnet50_QNN-cs_auto_lemans_8650: jgo214lkp +deeplabv3_resnet50_QNN-cs_auto_makena_8295: jpv619lr5 +deeplabv3_resnet50_QNN-cs_xr_8450: jgjv0wreg +deeplabv3_resnet50_ONNX: jpedrl7v5 +deeplabv3_resnet50_ONNX_FP16: jgz3x4lx5 +deeplabv3_plus_mobilenet_TFLITE: j5wed1lm5 +deeplabv3_plus_mobilenet_QNN: jg9l3xz8g +deeplabv3_plus_mobilenet_QNN-cs_8_elite: jp14dvn7p +deeplabv3_plus_mobilenet_QNN-cs_8550: jgdxrzdzp +deeplabv3_plus_mobilenet_QNN-cs_x_elite: j57yj7e95 +deeplabv3_plus_mobilenet_QNN-cs_auto_lemans_8255: jp4lx9y15 +deeplabv3_plus_mobilenet_QNN-cs_auto_lemans_8775: jpxk7dll5 +deeplabv3_plus_mobilenet_QNN-cs_auto_lemans_8650: j5mnwd09p +deeplabv3_plus_mobilenet_QNN-cs_auto_makena_8295: jgn697zq5 +deeplabv3_plus_mobilenet_QNN-cs_xr_8450: jprv4nl7g +deeplabv3_plus_mobilenet_ONNX: jp2k7vrqp +deeplabv3_plus_mobilenet_ONNX_FP16: jpy147olp +ddrnet23_slim_TFLITE: jp0z1vmn5 +ddrnet23_slim_QNN: jp8q34eop +ddrnet23_slim_QNN-cs_8_elite: jgkel92ng +ddrnet23_slim_QNN-cs_8550: j5q67mlop +ddrnet23_slim_QNN-cs_x_elite: jglv01wm5 +ddrnet23_slim_QNN-cs_auto_lemans_8255: j56y3doyp +ddrnet23_slim_QNN-cs_auto_lemans_8775: jp3j4wong +ddrnet23_slim_QNN-cs_auto_lemans_8650: jgo214dkp +ddrnet23_slim_QNN-cs_auto_makena_8295: jpv6192r5 +ddrnet23_slim_QNN-cs_xr_8450: jgjv0w3eg +ddrnet23_slim_ONNX: jpedrl6v5 +ddrnet23_slim_ONNX_FP16: jgz3x4zx5 +convnext_tiny_w8a16_quantized_TFLITE: j5wed1ym5 +convnext_tiny_w8a16_quantized_QNN: jg9l3xo8g +convnext_tiny_w8a16_quantized_QNN-cs_8_elite: jp14dvo7p +convnext_tiny_w8a16_quantized_QNN-cs_6490: jgdxrz6zp +convnext_tiny_w8a16_quantized_QNN-cs_8550: j57yj7o95 +convnext_tiny_w8a16_quantized_QNN-cs_x_elite: jp4lx9e15 +convnext_tiny_w8a16_quantized_QNN-cs_auto_lemans_8255: jpxk7d0l5 +convnext_tiny_w8a16_quantized_QNN-cs_auto_lemans_8775: j5mnwd99p +convnext_tiny_w8a16_quantized_QNN-cs_auto_lemans_8650: jgn6971q5 +convnext_tiny_w8a16_quantized_QNN-cs_auto_makena_8295: jprv4nx7g +convnext_tiny_w8a16_quantized_QNN-cs_xr_8450: jp2k7voqp +convnext_tiny_w8a16_quantized_ONNX: jpy1478lp +convnext_tiny_TFLITE: jp0z1von5 +convnext_tiny_QNN: jp8q34jop +convnext_tiny_QNN-cs_8_elite: jgkel96ng +convnext_tiny_QNN-cs_8550: j5q67m4op +convnext_tiny_QNN-cs_x_elite: jglv018m5 +convnext_tiny_QNN-cs_auto_lemans_8255: j56y3dmyp +convnext_tiny_QNN-cs_auto_lemans_8775: jp3j4w7ng +convnext_tiny_QNN-cs_auto_lemans_8650: jgo214wkp +convnext_tiny_QNN-cs_auto_makena_8295: jpv619mr5 +convnext_tiny_QNN-cs_xr_8450: jgjv0wyeg +convnext_tiny_ONNX: jpedrlxv5 +convnext_tiny_ONNX_FP16: jgz3x4yx5 +aotgan_TFLITE: jg9l3x28g +aotgan_QNN: jp14dv17p +aotgan_QNN-cs_8_elite: jgdxrz4zp +aotgan_QNN-cs_8550: j57yj7n95 +aotgan_QNN-cs_x_elite: jp4lx9415 +aotgan_QNN-cs_auto_lemans_8255: jpxk7drl5 +aotgan_QNN-cs_auto_lemans_8775: j5mnwdk9p +aotgan_QNN-cs_auto_lemans_8650: jgn697qq5 +aotgan_QNN-cs_auto_makena_8295: jprv4nd7g +aotgan_QNN-cs_xr_8450: jp2k7vdqp +aotgan_ONNX: jpy1472lp +aotgan_ONNX_FP16: jp0z1v9n5 +xlsr_quantized_TFLITE: jp8q34rop +xlsr_quantized_QNN: jgkel90ng +xlsr_quantized_QNN-cs_8_elite: j5q67m1op +xlsr_quantized_QNN-cs_6490: jglv01qm5 +xlsr_quantized_QNN-cs_8550: j56y3d0yp +xlsr_quantized_QNN-cs_x_elite: jp3j4wrng +xlsr_quantized_QNN-cs_auto_lemans_8255: jgo2149kp +xlsr_quantized_QNN-cs_auto_lemans_8775: jpv619nr5 +xlsr_quantized_QNN-cs_auto_lemans_8650: jgjv0w8eg +xlsr_quantized_QNN-cs_auto_makena_8295: jpedrlnv5 +xlsr_quantized_QNN-cs_xr_8450: jgz3x40x5 +xlsr_quantized_ONNX: j5wed1rm5 +wideresnet50_quantized_TFLITE: jg9l3xq8g +wideresnet50_quantized_QNN: jp14dvm7p +wideresnet50_quantized_QNN-cs_8_elite: jgdxrzmzp +wideresnet50_quantized_QNN-cs_6490: j5wed1r45 +wideresnet50_quantized_QNN-cs_8550: jg9l3xqmg +wideresnet50_quantized_QNN-cs_x_elite: jp14dvmnp +wideresnet50_quantized_QNN-cs_auto_lemans_8255: jgdxrzm6p +wideresnet50_quantized_QNN-cs_auto_lemans_8775: j57yj78n5 +wideresnet50_quantized_QNN-cs_auto_lemans_8650: jp4lx9225 +wideresnet50_quantized_QNN-cs_auto_makena_8295: jpxk7dz85 +wideresnet50_quantized_QNN-cs_xr_8450: j5mnwdl7p +wideresnet50_quantized_ONNX: jgn697wj5 +vit_quantized_TFLITE: jprv4n7kg +vit_quantized_QNN: jp2k7vz6p +vit_quantized_QNN-cs_8_elite: jpy147y0p +vit_quantized_QNN-cs_6490: jp0z1v805 +vit_quantized_QNN-cs_8550: jp8q34dqp +vit_quantized_QNN-cs_x_elite: jgkel9wvg +vit_quantized_QNN-cs_auto_lemans_8255: j5q67mxep +vit_quantized_QNN-cs_auto_lemans_8775: jglv01925 +vit_quantized_QNN-cs_auto_lemans_8650: j56y3d9np +vit_quantized_QNN-cs_auto_makena_8295: jp3j4wlmg +vit_quantized_QNN-cs_xr_8450: jgo21471p +vit_quantized_ONNX: jpv619yz5 +squeezenet1_1_quantized_TFLITE: jgjv0w61g +squeezenet1_1_quantized_QNN: jpedrl085 +squeezenet1_1_quantized_QNN-cs_8_elite: jgz3x4q45 +squeezenet1_1_quantized_QNN-cs_6490: j5wed1045 +squeezenet1_1_quantized_QNN-cs_8550: jg9l3x7mg +squeezenet1_1_quantized_QNN-cs_x_elite: jp14dvknp +squeezenet1_1_quantized_QNN-cs_auto_lemans_8255: jgdxrzy6p +squeezenet1_1_quantized_QNN-cs_auto_lemans_8775: j57yj71n5 +squeezenet1_1_quantized_QNN-cs_auto_lemans_8650: jp4lx9625 +squeezenet1_1_quantized_QNN-cs_auto_makena_8295: jpxk7d885 +squeezenet1_1_quantized_QNN-cs_xr_8450: j5mnwd17p +squeezenet1_1_quantized_ONNX: jgn697dj5 +shufflenet_v2_quantized_TFLITE: jprv4nmkg +shufflenet_v2_quantized_QNN: jp2k7vq6p +shufflenet_v2_quantized_QNN-cs_8_elite: jpy147k0p +shufflenet_v2_quantized_QNN-cs_6490: jp0z1vw05 +shufflenet_v2_quantized_QNN-cs_8550: jp8q34nqp +shufflenet_v2_quantized_QNN-cs_x_elite: jgkel91vg +shufflenet_v2_quantized_QNN-cs_auto_lemans_8255: j5q67mnep +shufflenet_v2_quantized_QNN-cs_auto_lemans_8775: jglv01d25 +shufflenet_v2_quantized_QNN-cs_auto_lemans_8650: j56y3dxnp +shufflenet_v2_quantized_QNN-cs_auto_makena_8295: jp3j4wdmg +shufflenet_v2_quantized_QNN-cs_xr_8450: jgo214x1p +shufflenet_v2_quantized_ONNX: jpv6198z5 +sesr_m5_quantized_TFLITE: jpedrlq85 +sesr_m5_quantized_QNN: jgz3x4645 +sesr_m5_quantized_QNN-cs_8_elite: j5wed1k45 +sesr_m5_quantized_QNN-cs_6490: jg9l3xrmg +sesr_m5_quantized_QNN-cs_8550: jp14dv9np +sesr_m5_quantized_QNN-cs_x_elite: jgdxrzk6p +sesr_m5_quantized_QNN-cs_auto_lemans_8255: j57yj7mn5 +sesr_m5_quantized_QNN-cs_auto_lemans_8775: jp4lx9725 +sesr_m5_quantized_QNN-cs_auto_lemans_8650: jpxk7dq85 +sesr_m5_quantized_QNN-cs_auto_makena_8295: j5mnwd77p +sesr_m5_quantized_QNN-cs_xr_8450: jgn6974j5 +sesr_m5_quantized_ONNX: jprv4nrkg +resnext50_quantized_TFLITE: jp2k7v16p +resnext50_quantized_QNN: jpy147l0p +resnext50_quantized_QNN-cs_8_elite: jp0z1vn05 +resnext50_quantized_QNN-cs_6490: jp8q34lqp +resnext50_quantized_QNN-cs_8550: jgkel9jvg +resnext50_quantized_QNN-cs_x_elite: j5q67mjep +resnext50_quantized_QNN-cs_auto_lemans_8255: jglv01j25 +resnext50_quantized_QNN-cs_auto_lemans_8775: j56y3dknp +resnext50_quantized_QNN-cs_auto_lemans_8650: jp3j4wymg +resnext50_quantized_QNN-cs_auto_makena_8295: jgo214j1p +resnext50_quantized_QNN-cs_xr_8450: jpv619jz5 +resnext50_quantized_ONNX: jgjv0wj1g +resnext101_quantized_TFLITE: jpedrlj85 +resnext101_quantized_QNN: jgz3x4145 +resnext101_quantized_QNN-cs_8_elite: j5wed1j45 +resnext101_quantized_QNN-cs_6490: jg9l3x6mg +resnext101_quantized_QNN-cs_8550: jp14dvrnp +resnext101_quantized_QNN-cs_x_elite: jgdxrzj6p +resnext101_quantized_QNN-cs_auto_lemans_8255: j57yj7qn5 +resnext101_quantized_QNN-cs_auto_lemans_8775: jp4lx9z25 +resnext101_quantized_QNN-cs_auto_lemans_8650: jpxk7dw85 +resnext101_quantized_QNN-cs_auto_makena_8295: j5mnwdj7p +resnext101_quantized_QNN-cs_xr_8450: jgn697jj5 +resnext101_quantized_ONNX: jprv4nzkg +resnet50_quantized_TFLITE: jp2k7v26p +resnet50_quantized_QNN: jpy14790p +resnet50_quantized_QNN-cs_8_elite: jp0z1vy05 +resnet50_quantized_QNN-cs_6490: jp8q34oqp +resnet50_quantized_QNN-cs_8550: jgkel9zvg +resnet50_quantized_QNN-cs_x_elite: j5q67m8ep +resnet50_quantized_QNN-cs_auto_lemans_8255: jglv01n25 +resnet50_quantized_QNN-cs_auto_lemans_8775: j56y3d6np +resnet50_quantized_QNN-cs_auto_lemans_8650: jp3j4wkmg +resnet50_quantized_QNN-cs_auto_makena_8295: jgo214y1p +resnet50_quantized_QNN-cs_xr_8450: jpv6193z5 +resnet50_quantized_ONNX: jgjv0wx1g +resnet18_quantized_TFLITE: jpedrl985 +resnet18_quantized_QNN: jgz3x4e45 +resnet18_quantized_QNN-cs_8_elite: j5wed1o45 +resnet18_quantized_QNN-cs_6490: jg9l3xvmg +resnet18_quantized_QNN-cs_8550: jp14dv0np +resnet18_quantized_QNN-cs_x_elite: jgdxrzw6p +resnet18_quantized_QNN-cs_auto_lemans_8255: j5wed1oz5 +resnet18_quantized_QNN-cs_auto_lemans_8775: jg9l3xvqg +resnet18_quantized_QNN-cs_auto_lemans_8650: jp14dv0kp +resnet18_quantized_QNN-cs_auto_makena_8295: jgdxrzwkp +resnet18_quantized_QNN-cs_xr_8450: j57yj7zq5 +resnet18_quantized_ONNX: jp4lx9qq5 +resnet101_quantized_TFLITE: jpxk7dvj5 +resnet101_quantized_QNN: j5mnwdryp +resnet101_quantized_QNN-cs_8_elite: jgn697yv5 +resnet101_quantized_QNN-cs_6490: jprv4nqvg +resnet101_quantized_QNN-cs_8550: jp2k7v6xp +resnet101_quantized_QNN-cs_x_elite: jpy147wrp +resnet101_quantized_QNN-cs_auto_lemans_8255: jp0z1vq25 +resnet101_quantized_QNN-cs_auto_lemans_8775: jp8q349zp +resnet101_quantized_QNN-cs_auto_lemans_8650: jgkel9nyg +resnet101_quantized_QNN-cs_auto_makena_8295: j5q67mk7p +resnet101_quantized_QNN-cs_xr_8450: jglv01ze5 +resnet101_quantized_ONNX: j56y3djvp +regnet_quantized_TFLITE: jp3j4w3xg +regnet_quantized_QNN: jgo21404p +regnet_quantized_QNN-cs_8_elite: jpv619o75 +regnet_quantized_QNN-cs_6490: jgjv0wm7g +regnet_quantized_QNN-cs_8550: jpedrl175 +regnet_quantized_QNN-cs_x_elite: jgz3x49z5 +regnet_quantized_QNN-cs_auto_lemans_8255: j5wed1vz5 +regnet_quantized_QNN-cs_auto_lemans_8775: jg9l3x1qg +regnet_quantized_QNN-cs_auto_lemans_8650: jp14dvlkp +regnet_quantized_QNN-cs_auto_makena_8295: jgdxrz9kp +regnet_quantized_QNN-cs_xr_8450: j57yj7wq5 +regnet_quantized_ONNX: jp4lx9oq5 +quicksrnetsmall_quantized_TFLITE: j5mnwd2yp +quicksrnetsmall_quantized_QNN: jgn6978v5 +quicksrnetsmall_quantized_QNN-cs_8_elite: jprv4njvg +quicksrnetsmall_quantized_QNN-cs_6490: jp2k7vnxp +quicksrnetsmall_quantized_QNN-cs_8550: jpy1470rp +quicksrnetsmall_quantized_QNN-cs_x_elite: jp0z1v725 +quicksrnetsmall_quantized_QNN-cs_auto_lemans_8255: jp8q34vzp +quicksrnetsmall_quantized_QNN-cs_auto_lemans_8775: jgkel9myg +quicksrnetsmall_quantized_QNN-cs_auto_lemans_8650: j5q67mo7p +quicksrnetsmall_quantized_QNN-cs_auto_makena_8295: jglv01re5 +quicksrnetsmall_quantized_QNN-cs_xr_8450: j56y3dlvp +quicksrnetsmall_quantized_ONNX: jp3j4w2xg +quicksrnetmedium_quantized_TFLITE: jgo214q4p +quicksrnetmedium_quantized_QNN: jpv619x75 +quicksrnetmedium_quantized_QNN-cs_8_elite: jgjv0w47g +quicksrnetmedium_quantized_QNN-cs_6490: jpedrl375 +quicksrnetmedium_quantized_QNN-cs_8550: jgz3x4kz5 +quicksrnetmedium_quantized_QNN-cs_x_elite: j5wed1nz5 +quicksrnetmedium_quantized_QNN-cs_auto_lemans_8255: jg9l3xeqg +quicksrnetmedium_quantized_QNN-cs_auto_lemans_8775: jp14dvxkp +quicksrnetmedium_quantized_QNN-cs_auto_lemans_8650: jgdxrzlkp +quicksrnetmedium_quantized_QNN-cs_auto_makena_8295: j57yj73q5 +quicksrnetmedium_quantized_QNN-cs_xr_8450: jp4lx90q5 +quicksrnetmedium_quantized_ONNX: jpxk7d2j5 +quicksrnetlarge_quantized_TFLITE: j5mnwdyyp +quicksrnetlarge_quantized_QNN: jgn69kvv5 +quicksrnetlarge_quantized_QNN-cs_8_elite: jprv4w3vg +quicksrnetlarge_quantized_QNN-cs_6490: jp2k7eyxp +quicksrnetlarge_quantized_QNN-cs_8550: jpy14m3rp +quicksrnetlarge_quantized_QNN-cs_x_elite: jp0z16025 +quicksrnetlarge_quantized_QNN-cs_auto_lemans_8255: jp8q31yzp +quicksrnetlarge_quantized_QNN-cs_auto_lemans_8775: jgkel8xyg +quicksrnetlarge_quantized_QNN-cs_auto_lemans_8650: j5q67vq7p +quicksrnetlarge_quantized_QNN-cs_auto_makena_8295: jglv0lme5 +quicksrnetlarge_quantized_QNN-cs_xr_8450: j56y3w4vp +quicksrnetlarge_quantized_ONNX: jp3j460xg +mobilenet_v3_large_quantized_TFLITE: jgo21864p +mobilenet_v3_large_quantized_QNN: jpv617k75 +mobilenet_v3_large_quantized_QNN-cs_8_elite: jgjv0qn7g +mobilenet_v3_large_quantized_QNN-cs_6490: jpedrym75 +mobilenet_v3_large_quantized_QNN-cs_8550: jgz3xndz5 +mobilenet_v3_large_quantized_QNN-cs_x_elite: j5wed46z5 +mobilenet_v3_large_quantized_QNN-cs_auto_lemans_8255: jg9l3dnqg +mobilenet_v3_large_quantized_QNN-cs_auto_lemans_8775: jp14d6zkp +mobilenet_v3_large_quantized_QNN-cs_auto_lemans_8650: jgdxr21kp +mobilenet_v3_large_quantized_QNN-cs_auto_makena_8295: j57yj9rq5 +mobilenet_v3_large_quantized_QNN-cs_xr_8450: jp4lx3rq5 +mobilenet_v3_large_quantized_ONNX: jpxk7xoj5 +mobilenet_v2_quantized_TFLITE: j5mnw8xyp +mobilenet_v2_quantized_QNN: jgn69k6v5 +mobilenet_v2_quantized_QNN-cs_8_elite: jprv4wvvg +mobilenet_v2_quantized_QNN-cs_6490: jp2k7ekxp +mobilenet_v2_quantized_QNN-cs_8550: jpy14m1rp +mobilenet_v2_quantized_QNN-cs_x_elite: jp0z16z25 +mobilenet_v2_quantized_QNN-cs_auto_lemans_8255: jp8q31qzp +mobilenet_v2_quantized_QNN-cs_auto_lemans_8775: jgkel8eyg +mobilenet_v2_quantized_QNN-cs_auto_lemans_8650: j5q67v67p +mobilenet_v2_quantized_QNN-cs_auto_makena_8295: jglv0lve5 +mobilenet_v2_quantized_QNN-cs_xr_8450: j56y3wyvp +mobilenet_v2_quantized_ONNX: jp3j46jxg +inception_v3_quantized_TFLITE: jgo21824p +inception_v3_quantized_QNN: jpv617675 +inception_v3_quantized_QNN-cs_8_elite: jgjv0qv7g +inception_v3_quantized_QNN-cs_6490: jpedryd75 +inception_v3_quantized_QNN-cs_8550: jgz3xn3z5 +inception_v3_quantized_QNN-cs_x_elite: j5wed4ez5 +inception_v3_quantized_QNN-cs_auto_lemans_8255: jg9l3dlqg +inception_v3_quantized_QNN-cs_auto_lemans_8775: jp14d64kp +inception_v3_quantized_QNN-cs_auto_lemans_8650: jgdxr2xkp +inception_v3_quantized_QNN-cs_auto_makena_8295: j5wed4ej5 +inception_v3_quantized_QNN-cs_xr_8450: jg9l3dlvg +inception_v3_quantized_ONNX: jp14d64lp +googlenet_quantized_TFLITE: j57yj92r5 +googlenet_quantized_QNN: jp4lx3nl5 +googlenet_quantized_QNN-cs_8_elite: jpxk7x995 +googlenet_quantized_QNN-cs_6490: j5mnw8eqp +googlenet_quantized_QNN-cs_8550: jgn69k0m5 +googlenet_quantized_QNN-cs_x_elite: jprv4w6eg +googlenet_quantized_QNN-cs_auto_lemans_8255: jp2k7exmp +googlenet_quantized_QNN-cs_auto_lemans_8775: jpy14mz4p +googlenet_quantized_QNN-cs_auto_lemans_8650: jp0z164e5 +googlenet_quantized_QNN-cs_auto_makena_8295: jp8q3128p +googlenet_quantized_QNN-cs_xr_8450: jgkel8vog +googlenet_quantized_ONNX: j5q67v0mp +fcn_resnet50_quantized_TFLITE: jglv0l4l5 +fcn_resnet50_quantized_QNN: j56y3w27p +fcn_resnet50_quantized_QNN-cs_8_elite: jp3j46nzg +fcn_resnet50_quantized_QNN-cs_6490: jgo218zdp +fcn_resnet50_quantized_QNN-cs_8550: jpv617qm5 +fcn_resnet50_quantized_QNN-cs_x_elite: jgjv0qd8g +fcn_resnet50_quantized_QNN-cs_auto_lemans_8255: jpedryo05 +fcn_resnet50_quantized_QNN-cs_auto_lemans_8775: jgz3xn265 +fcn_resnet50_quantized_QNN-cs_auto_lemans_8650: j5wed4wj5 +fcn_resnet50_quantized_QNN-cs_auto_makena_8295: jg9l3d0vg +fcn_resnet50_quantized_QNN-cs_xr_8450: jp14d62lp +fcn_resnet50_quantized_ONNX: jgdxr2nlp +densenet121_quantized_TFLITE: j57yj90r5 +densenet121_quantized_QNN: jp4lx3kl5 +densenet121_quantized_QNN-cs_8_elite: jpxk7xn95 +densenet121_quantized_QNN-cs_6490: j5mnw8qqp +densenet121_quantized_QNN-cs_8550: jgn69klm5 +densenet121_quantized_QNN-cs_x_elite: jprv4w8eg +densenet121_quantized_QNN-cs_auto_lemans_8255: jp2k7e0mp +densenet121_quantized_QNN-cs_auto_lemans_8775: jpy14mr4p +densenet121_quantized_QNN-cs_auto_lemans_8650: jp0z163e5 +densenet121_quantized_QNN-cs_auto_makena_8295: jp8q3108p +densenet121_quantized_QNN-cs_xr_8450: jgkel87og +densenet121_quantized_ONNX: j5q67vemp +deeplabv3_plus_mobilenet_quantized_TFLITE: jglv0l6l5 +deeplabv3_plus_mobilenet_quantized_QNN: j56y3we7p +deeplabv3_plus_mobilenet_quantized_QNN-cs_8_elite: jp3j46vzg +deeplabv3_plus_mobilenet_quantized_QNN-cs_6490: jgo218kdp +deeplabv3_plus_mobilenet_quantized_QNN-cs_8550: jpv6170m5 +deeplabv3_plus_mobilenet_quantized_QNN-cs_x_elite: jgjv0qz8g +deeplabv3_plus_mobilenet_quantized_QNN-cs_auto_lemans_8255: jpedrye05 +deeplabv3_plus_mobilenet_quantized_QNN-cs_auto_lemans_8775: jgz3xno65 +deeplabv3_plus_mobilenet_quantized_QNN-cs_auto_lemans_8650: j5wed42j5 +deeplabv3_plus_mobilenet_quantized_QNN-cs_auto_makena_8295: jg9l3djvg +deeplabv3_plus_mobilenet_quantized_QNN-cs_xr_8450: jp14d6ylp +deeplabv3_plus_mobilenet_quantized_ONNX: jgdxr2elp +convnext_tiny_w8a8_quantized_TFLITE: j57yj9lr5 +convnext_tiny_w8a8_quantized_QNN: jp4lx3dl5 +convnext_tiny_w8a8_quantized_QNN-cs_8_elite: jpxk7x695 +convnext_tiny_w8a8_quantized_QNN-cs_6490: j5mnw86qp +convnext_tiny_w8a8_quantized_QNN-cs_8550: jgn69kmm5 +convnext_tiny_w8a8_quantized_QNN-cs_x_elite: jprv4w2eg +convnext_tiny_w8a8_quantized_QNN-cs_auto_lemans_8255: jp2k7e9mp +convnext_tiny_w8a8_quantized_QNN-cs_auto_lemans_8775: jpy14mj4p +convnext_tiny_w8a8_quantized_QNN-cs_auto_lemans_8650: jp0z162e5 +convnext_tiny_w8a8_quantized_QNN-cs_auto_makena_8295: jp8q31m8p +convnext_tiny_w8a8_quantized_QNN-cs_xr_8450: jgkel8qog +convnext_tiny_w8a8_quantized_ONNX: j5q67vrmp diff --git a/qai_hub_models/scorecard/intermediates/inference-jobs.yaml b/qai_hub_models/scorecard/intermediates/inference-jobs.yaml new file mode 100644 index 00000000..d1451e9e --- /dev/null +++ b/qai_hub_models/scorecard/intermediates/inference-jobs.yaml @@ -0,0 +1,337 @@ +yolov8_seg_TFLITE: jgjv0qe8g +yolov8_seg_QNN: jpedryk05 +yolov8_seg_ONNX: jgz3xnr65 +yolov8_det_quantized_TFLITE: jgz3xn7k5 +yolov8_det_quantized_QNN: j5wed4d65 +yolov8_det_quantized_ONNX: jg9l3d3lg +yolov8_det_TFLITE: jgdxr27ep +yolov8_det_QNN: j57yj9vl5 +yolov8_det_ONNX: jp4lx3jv5 +yolov7_quantized_TFLITE: jprv4w99g +yolov7_quantized_QNN: jp2k7ej4p +yolov7_quantized_ONNX: jpy14mn7p +yolov7_TFLITE: jp2k7e4rp +yolov7_QNN: jpy14mq8p +yolov7_ONNX: jp0z16d95 +yolov6_TFLITE: jgkel89wg +yolov6_QNN: j5q67vmnp +yolov6_ONNX: jglv0l1j5 +yolov11_det_TFLITE: jp3j4663g +yolov11_det_QNN: jgo2188qp +yolov11_det_ONNX: jpv617dk5 +yolonas_quantized_TFLITE: j5wed4l35 +yolonas_TFLITE: jpedry6v5 +yolonas_QNN: jgz3xnzx5 +yolonas_ONNX: j5wed4ym5 +xlsr_TFLITE: jp14d617p +xlsr_QNN: jgdxr24zp +xlsr_ONNX: j57yj9n95 +wideresnet50_TFLITE: jpxk7xzl5 +wideresnet50_QNN: j5mnw8l9p +wideresnet50_ONNX: jgn69kwq5 +whisper_tiny_en_TFLITE_WhisperEncoder: jgn69k4j5 +whisper_tiny_en_TFLITE_WhisperDecoder: jprv4wrkg +whisper_tiny_en_QNN_WhisperEncoder: jp2k7e16p +whisper_tiny_en_QNN_WhisperDecoder: jpy14ml0p +whisper_small_en_TFLITE_WhisperEncoder: jgkel8nvg +whisper_small_en_TFLITE_WhisperDecoder: j5q67vkep +whisper_small_en_QNN_WhisperEncoder: jglv0lz25 +whisper_small_en_QNN_WhisperDecoder: j56y3wjnp +whisper_small_en_ONNX_WhisperEncoder: jp3j463mg +whisper_small_en_ONNX_WhisperDecoder: jgo21801p +whisper_base_en_TFLITE_WhisperEncoder: jgo21r64p +whisper_base_en_TFLITE_WhisperDecoder: jpv61dk75 +whisper_base_en_QNN_WhisperEncoder: jgjv07n7g +whisper_base_en_QNN_WhisperDecoder: jpedrzm75 +whisper_base_en_ONNX_WhisperEncoder: jgz3xmdz5 +whisper_base_en_ONNX_WhisperDecoder: j5wed76z5 +vit_TFLITE: jgdxr3xkp +vit_ONNX: j57yj4yq5 +unet_segmentation_TFLITE: jp14dj2lp +unet_segmentation_QNN: jgdxr3nlp +unet_segmentation_ONNX: j57yj40r5 +trocr_TFLITE_TrOCREncoder: j5mnwm3qp +trocr_TFLITE_TrOCRDecoder: jgn69n3m5 +trocr_QNN_TrOCREncoder: jprv40eeg +trocr_QNN_TrOCRDecoder: jp2k7wlmp +trocr_ONNX_TrOCREncoder: jpy14x64p +trocr_ONNX_TrOCRDecoder: jp0z1jle5 +swin_tiny_TFLITE: jgkel4yog +swin_tiny_QNN: j5q67y2mp +swin_tiny_ONNX: jglv0xkl5 +swin_small_TFLITE: jgkel4l2g +swin_small_QNN: j5q67y74p +swin_small_ONNX: jglv0x085 +swin_base_TFLITE: jp3j49qlg +swin_base_QNN: jgo21rexp +swin_base_ONNX: jpv61dzj5 +squeezenet1_1_TFLITE: jgz3xmwk5 +squeezenet1_1_QNN: j5wed7865 +squeezenet1_1_ONNX: jg9l3mklg +sinet_TFLITE: jgdxr3zep +sinet_QNN: j5wed7135 +sinet_ONNX: jg9l3mxwg +shufflenet_v2_TFLITE: jgdxr32rp +shufflenet_v2_QNN: j57yj49v5 +shufflenet_v2_ONNX: jp4lx1385 +sesr_m5_TFLITE: j5mnwmmdp +sesr_m5_QNN: jgn69nnk5 +sesr_m5_ONNX: jprv4000g +sam_TFLITE_SAMDecoder: jgdxr36rp +sam_TFLITE_SAMEncoder: j5wed7ym5 +resnext50_TFLITE: jp14dj17p +resnext50_QNN: jgdxr34zp +resnext50_ONNX: j57yj4n95 +resnext101_TFLITE: jprv4077g +resnext101_QNN: jp2k7wzqp +resnext101_ONNX: jpy14xylp +resnet50_TFLITE: jp8q3xdop +resnet50_QNN: jgkel4wng +resnet50_ONNX: j5q67yxop +resnet18_TFLITE: jp8q3xlqp +resnet18_QNN: jgkel4jvg +resnet18_ONNX: j5q67yjep +resnet101_TFLITE: j56y376np +resnet101_QNN: jp3j49kmg +resnet101_ONNX: jgo21ry1p +regnet_TFLITE: jgjv07m1g +regnet_QNN: jpedrz185 +regnet_ONNX: jgz3xm945 +real_esrgan_x4plus_TFLITE: jg9l3memg +real_esrgan_x4plus_QNN: jp14djxnp +real_esrgan_x4plus_ONNX: jgdxr3l6p +real_esrgan_general_x4v3_TFLITE: jp14dnzkp +real_esrgan_general_x4v3_QNN: jgdxrd1kp +real_esrgan_general_x4v3_ONNX: j57yjerq5 +quicksrnetsmall_TFLITE: jpxk7lkj5 +quicksrnetsmall_QNN: j5mnw0nyp +quicksrnetsmall_ONNX: jgn69z0v5 +quicksrnetmedium_TFLITE: jp2k7r0xp +quicksrnetmedium_QNN: jpy14orrp +quicksrnetmedium_ONNX: jp0z1m325 +quicksrnetlarge_TFLITE: jp2k7r9mp +quicksrnetlarge_QNN: jpy14oj4p +quicksrnetlarge_ONNX: jp0z1m2e5 +posenet_mobilenet_quantized_TFLITE: jglv0y3l5 +posenet_mobilenet_quantized_QNN: j56y38n7p +posenet_mobilenet_TFLITE: jgo21lvdp +posenet_mobilenet_QNN: jpv61lwm5 +posenet_mobilenet_ONNX: jgjv0rl8g +openpose_TFLITE: j5wedldj5 +openpose_QNN: jg9l3z3vg +openpose_ONNX: jp14dndlp +openai_clip_TFLITE_CLIPTextEncoder: jg9l3zklg +openai_clip_TFLITE_CLIPImageEncoder: jp14dn72p +openai_clip_QNN_CLIPTextEncoder: jgdxrd8ep +openai_clip_QNN_CLIPImageEncoder: j57yjekl5 +openai_clip_ONNX_CLIPTextEncoder: jp4lxymv5 +openai_clip_ONNX_CLIPImageEncoder: jpxk7l315 +mobilenet_v3_small_TFLITE: jgn69z7r5 +mobilenet_v3_small_QNN: jprv4ln9g +mobilenet_v3_small_ONNX: jp2k7rv4p +mobilenet_v3_large_TFLITE: jprv4lw0g +mobilenet_v3_large_QNN: jp2k7rerp +mobilenet_v3_large_ONNX: jpy14om8p +mobilenet_v2_TFLITE: jp8q3exkp +mobilenet_v2_QNN: jgkel24wg +mobilenet_v2_ONNX: j5q67lynp +mnasnet05_TFLITE: j56y3886p +mnasnet05_QNN: jp3j4zz3g +mnasnet05_ONNX: jgo21llqp +midas_quantized_TFLITE: j5wedlz35 +midas_quantized_QNN: jg9l3z2wg +midas_TFLITE: jgz3xl0x5 +midas_QNN: j5wedlrm5 +midas_ONNX: jg9l3zq8g +mediapipe_selfie_TFLITE: jgdxrdyzp +mediapipe_selfie_QNN: j57yje195 +mediapipe_selfie_ONNX: jp4lxy615 +mediapipe_pose_TFLITE_MediaPipePoseDetector: jp14dnrnp +mediapipe_pose_TFLITE_MediaPipePoseLandmarkDetector: jgdxrdj6p +mediapipe_pose_ONNX_MediaPipePoseDetector: j57yjeqn5 +mediapipe_pose_ONNX_MediaPipePoseLandmarkDetector: jp4lxyz25 +mediapipe_hand_TFLITE_MediaPipeHandDetector: jpxk7lj85 +mediapipe_hand_TFLITE_MediaPipeHandLandmarkDetector: j5mnw027p +mediapipe_hand_ONNX_MediaPipeHandDetector: jgn69zyj5 +mediapipe_hand_ONNX_MediaPipeHandLandmarkDetector: jprv4lqkg +mediapipe_face_quantized_TFLITE_MediaPipeFaceDetector: jp0z1oz25 +mediapipe_face_quantized_TFLITE_MediaPipeFaceLandmarkDetector: jp8q3jqzp +mediapipe_face_quantized_QNN_MediaPipeFaceDetector: jgkel6eyg +mediapipe_face_quantized_QNN_MediaPipeFaceLandmarkDetector: j5q67467p +mediapipe_face_TFLITE_MediaPipeFaceDetector: jp3j4ovxg +mediapipe_face_TFLITE_MediaPipeFaceLandmarkDetector: jgo21dk4p +mediapipe_face_QNN_MediaPipeFaceDetector: jpv612075 +mediapipe_face_QNN_MediaPipeFaceLandmarkDetector: jgjv03z7g +mediapipe_face_ONNX_MediaPipeFaceDetector: jpedr6e75 +mediapipe_face_ONNX_MediaPipeFaceLandmarkDetector: jgz3xzoz5 +litehrnet_TFLITE: jgjv0327g +litehrnet_ONNX: jpedr6w75 +lama_dilated_TFLITE: jgjv03e8g +lama_dilated_QNN: jpedr6k05 +inception_v3_TFLITE: j5wedy9j5 +inception_v3_QNN: jg9l3o4vg +inception_v3_ONNX: jp14do8lp +huggingface_wavlm_base_plus_TFLITE: jpedr6r05 +hrnet_pose_quantized_TFLITE: jgdxr67lp +hrnet_pose_quantized_QNN: j5wedyx65 +hrnet_pose_TFLITE: jp14do72p +hrnet_pose_QNN: jgdxr68ep +hrnet_pose_ONNX: j57yjokl5 +googlenet_TFLITE: jpxk70d15 +googlenet_QNN: j5mnw9dwp +googlenet_ONNX: jgn6917r5 +gear_guard_net_quantized_TFLITE: jp8q3j1xp +gear_guard_net_quantized_QNN: jgkel682g +gear_guard_net_quantized_ONNX: j5q674v4p +gear_guard_net_TFLITE: jp8q3jxkp +gear_guard_net_QNN: jgkel64wg +gear_guard_net_ONNX: j5q674ynp +foot_track_net_quantized_TFLITE: jgo21dlqp +foot_track_net_quantized_QNN: jpv6122k5 +foot_track_net_quantized_ONNX: jgjv033vg +foot_track_net_TFLITE: jgz3xzyo5 +foot_track_net_QNN: j5wedyz35 +foot_track_net_ONNX: jg9l3o2wg +ffnet_78s_quantized_TFLITE: jg9l3oq8g +ffnet_78s_quantized_ONNX: jp14dom7p +ffnet_78s_lowres_TFLITE: j57yjo195 +ffnet_78s_lowres_QNN: jp4lxe615 +ffnet_78s_lowres_ONNX: jpxk708l5 +ffnet_78s_TFLITE: jprv4xr7g +ffnet_78s_QNN: jp2k7o1qp +ffnet_78s_ONNX: jpy148llp +ffnet_54s_quantized_TFLITE: j5q674jop +ffnet_54s_quantized_ONNX: jglv0wnm5 +ffnet_54s_TFLITE: jgkel6nvg +ffnet_54s_QNN: j5q674kep +ffnet_54s_ONNX: jglv0wz25 +ffnet_40s_quantized_TFLITE: jpv612xz5 +ffnet_40s_quantized_ONNX: jgjv0341g +ffnet_40s_TFLITE: jgz3xyd45 +ffnet_40s_QNN: j5wedz645 +ffnet_40s_ONNX: jg9l32nmg +ffnet_122ns_lowres_TFLITE: jgdxr4x6p +ffnet_122ns_lowres_QNN: j5wedzez5 +ffnet_122ns_lowres_ONNX: jg9l32lqg +fcn_resnet50_TFLITE: jgdxr4nkp +fcn_resnet50_QNN: j57yjn2q5 +fcn_resnet50_ONNX: jp4lx4nq5 +fastsam_x_QNN: j5mnwkqyp +fastsam_x_ONNX: jgn69qmv5 +fastsam_s_QNN: jp2k7dlxp +fastsam_s_ONNX: jpy1426rp +facemap_3dmm_TFLITE: jprv4dyeg +facemap_3dmm_QNN: jp2k7dmmp +facemap_3dmm_ONNX: jpy142d4p +face_det_lite_TFLITE: jgkel0log +face_det_lite_QNN: j5q6717mp +face_det_lite_ONNX: jglv080l5 +face_body_net_TFLITE: jp3j47qzg +face_body_net_QNN: jgo21wedp +face_body_net_ONNX: jpv61mzm5 +face_attrib_net_TFLITE: jpedrx205 +face_attrib_net_QNN: jgz3xyw65 +face_attrib_net_ONNX: j5wedzxj5 +esrgan_TFLITE: jgz3xy8k5 +esrgan_QNN: j5wedz165 +esrgan_ONNX: jg9l32xlg +efficientvit_l2_cls_TFLITE: jgdxr42ep +efficientvit_l2_cls_QNN: j57yjn9l5 +efficientvit_l2_cls_ONNX: jp4lx43v5 +efficientvit_b2_cls_TFLITE: j5mnwkmwp +efficientvit_b2_cls_QNN: jgn69qnr5 +efficientvit_b2_cls_ONNX: jprv4d09g +efficientnet_b4_TFLITE: jgn69qzk5 +efficientnet_b4_QNN: jprv4dl0g +efficientnet_b4_ONNX: jp2k7drrp +efficientnet_b0_TFLITE: jp8q3rjkp +efficientnet_b0_QNN: jgkel06wg +efficientnet_b0_ONNX: j5q6714np +detr_resnet50_dc5_TFLITE: j56y3mm6p +detr_resnet50_dc5_ONNX: jp3j4773g +detr_resnet50_TFLITE: jpv61myk5 +detr_resnet50_ONNX: jgjv0y6vg +detr_resnet101_dc5_TFLITE: jgo21wxkp +detr_resnet101_dc5_ONNX: jpv61m8r5 +detr_resnet101_TFLITE: jpedrxjv5 +detr_resnet101_ONNX: jgz3xy1x5 +densenet121_TFLITE: jg9l32v8g +densenet121_QNN: jp14d107p +densenet121_ONNX: jgdxr4wzp +deeplabv3_resnet50_TFLITE: jg9l3218g +deeplabv3_plus_mobilenet_TFLITE: jgz3xyk45 +deeplabv3_plus_mobilenet_QNN: j5wedzn45 +deeplabv3_plus_mobilenet_ONNX: jg9l32emg +ddrnet23_slim_TFLITE: jgdxrm16p +ddrnet23_slim_ONNX: j57yj8rn5 +convnext_tiny_w8a16_quantized_QNN: jgjv08v1g +convnext_tiny_TFLITE: jgz3x0245 +convnext_tiny_QNN: j5wedrw45 +convnext_tiny_ONNX: jg9l3q0mg +aotgan_TFLITE: jgz3x0oz5 +aotgan_QNN: j5wedr2z5 +xlsr_quantized_TFLITE: jp4lx2dq5 +xlsr_quantized_QNN: jpxk7z6j5 +xlsr_quantized_ONNX: j5mnwl6yp +wideresnet50_quantized_TFLITE: jpy14ydrp +wideresnet50_quantized_QNN: jp0z1xr25 +wideresnet50_quantized_ONNX: jp8q3k7zp +vit_quantized_QNN: jp8q3k38p +vit_quantized_ONNX: jgkelklog +squeezenet1_1_quantized_TFLITE: jpv61nzm5 +squeezenet1_1_quantized_QNN: jgjv08k8g +squeezenet1_1_quantized_ONNX: jpedrn405 +shufflenet_v2_quantized_TFLITE: jgdxrm0lp +shufflenet_v2_quantized_QNN: j57yj8kr5 +shufflenet_v2_quantized_ONNX: jp4lx2ml5 +sesr_m5_quantized_TFLITE: jpxk7zd15 +sesr_m5_quantized_QNN: j5mnwldwp +sesr_m5_quantized_ONNX: jgn69w7r5 +resnext50_quantized_TFLITE: jp0z1x665 +resnext50_quantized_QNN: jp8q3k1xp +resnext50_quantized_ONNX: jgkelk82g +resnext101_quantized_TFLITE: jp3j4r9lg +resnext101_quantized_QNN: jgo219rxp +resnext101_quantized_ONNX: jpv61ndj5 +resnet50_quantized_TFLITE: j5wedry65 +resnet50_quantized_QNN: jg9l3qolg +resnet50_quantized_ONNX: jp14dmo2p +resnet18_quantized_TFLITE: j57yj8nv5 +resnet18_quantized_QNN: jp4lx2485 +resnet18_quantized_ONNX: jpxk7zr35 +resnet101_quantized_TFLITE: jpy14yy8p +resnet101_quantized_QNN: jp0z1xx95 +resnet101_quantized_ONNX: jp8q3kkkp +regnet_quantized_TFLITE: j56y3096p +regnet_quantized_QNN: jp3j4rl3g +regnet_quantized_ONNX: jgo2197qp +quicksrnetsmall_quantized_TFLITE: jgo219jkp +quicksrnetsmall_quantized_QNN: jpv61njr5 +quicksrnetsmall_quantized_ONNX: jgjv08jeg +quicksrnetmedium_quantized_TFLITE: jg9l3qv8g +quicksrnetmedium_quantized_QNN: jp14dm07p +quicksrnetmedium_quantized_ONNX: jgdxrmwzp +quicksrnetlarge_quantized_TFLITE: j5mnwl29p +quicksrnetlarge_quantized_QNN: jgn69wyq5 +quicksrnetlarge_quantized_ONNX: jprv47q7g +mobilenet_v3_large_quantized_TFLITE: jprv47jkg +mobilenet_v3_large_quantized_QNN: jp2k7zn6p +mobilenet_v3_large_quantized_ONNX: jpy14y00p +mobilenet_v2_quantized_TFLITE: j5q67x6ep +mobilenet_v2_quantized_QNN: jglv09v25 +mobilenet_v2_quantized_ONNX: j56y39ynp +inception_v3_quantized_TFLITE: jpedr0o85 +inception_v3_quantized_QNN: jgz3xq245 +inception_v3_quantized_ONNX: j5wed0w45 +googlenet_quantized_TFLITE: j5wed02z5 +googlenet_quantized_QNN: jg9l37jqg +googlenet_quantized_ONNX: jp14dkykp +fcn_resnet50_quantized_TFLITE: jpxk786j5 +fcn_resnet50_quantized_QNN: j5mnw16yp +fcn_resnet50_quantized_ONNX: jgn69d3v5 +densenet121_quantized_QNN: jp0z18r25 +densenet121_quantized_ONNX: jp8q3d7zp +deeplabv3_plus_mobilenet_quantized_TFLITE: j56y393vp +deeplabv3_plus_mobilenet_quantized_QNN: jp3j4l4xg +deeplabv3_plus_mobilenet_quantized_ONNX: jgo21714p +convnext_tiny_w8a8_quantized_QNN: jgo217edp diff --git a/qai_hub_models/scorecard/intermediates/profile-jobs.yaml b/qai_hub_models/scorecard/intermediates/profile-jobs.yaml new file mode 100644 index 00000000..411b8121 --- /dev/null +++ b/qai_hub_models/scorecard/intermediates/profile-jobs.yaml @@ -0,0 +1,3180 @@ +yolov8_seg_TFLITE-cs_8_gen_2: jpv617rm5 +yolov8_seg_TFLITE-cs_8_gen_3: jgjv0q28g +yolov8_seg_TFLITE-cs_8_elite: jpedryw05 +yolov8_seg_TFLITE-cs_8550: jgz3xnj65 +yolov8_seg_TFLITE-cs_auto_lemans_8255: j5wed43j5 +yolov8_seg_TFLITE-cs_auto_lemans_8775: jg9l3dyvg +yolov8_seg_TFLITE-cs_auto_lemans_8650: jp14d6wlp +yolov8_seg_TFLITE-cs_auto_makena_8295: jgdxr2qlp +yolov8_seg_TFLITE-cs_xr_8450: j57yj9xr5 +yolov8_seg_QNN-cs_8_gen_2: jp4lx3vl5 +yolov8_seg_QNN-cs_8_gen_3: jpxk7xy95 +yolov8_seg_QNN-cs_8_elite: j5mnw83qp +yolov8_seg_QNN-cs_8550: jgn69k3m5 +yolov8_seg_QNN-cs_x_elite: jprv4weeg +yolov8_seg_QNN-cs_auto_lemans_8255: jp2k7elmp +yolov8_seg_QNN-cs_auto_lemans_8775: jpy14m64p +yolov8_seg_QNN-cs_auto_lemans_8650: jp0z16le5 +yolov8_seg_QNN-cs_auto_makena_8295: jp8q31z8p +yolov8_seg_QNN-cs_xr_8450: jgkel83og +yolov8_seg_ONNX-cs_8_gen_2: j5q67v3mp +yolov8_seg_ONNX-cs_8_gen_3: jglv0l3l5 +yolov8_seg_ONNX-cs_8_elite: j56y3wn7p +yolov8_seg_ONNX-cs_x_elite: jp3j46ezg +yolov8_seg_ONNX_DML_GPU-cs_x_elite: jgo2183dp +yolov8_seg_ONNX_DML_NPU-cs_x_elite: jpv617vm5 +yolov8_det_quantized_TFLITE-cs_8_gen_2: j5wed4qj5 +yolov8_det_quantized_TFLITE-cs_8_gen_3: jg9l3dwvg +yolov8_det_quantized_TFLITE-cs_8_elite: jp14d6elp +yolov8_det_quantized_TFLITE-cs_6490: jgdxr2olp +yolov8_det_quantized_TFLITE-cs_8250: j5wed4965 +yolov8_det_quantized_TFLITE-cs_8550: jg9l3d4lg +yolov8_det_quantized_TFLITE-cs_auto_lemans_8255: jp14d682p +yolov8_det_quantized_TFLITE-cs_auto_lemans_8775: jgdxr2vep +yolov8_det_quantized_TFLITE-cs_auto_lemans_8650: j57yj9dl5 +yolov8_det_quantized_TFLITE-cs_auto_makena_8295: jp4lx3wv5 +yolov8_det_quantized_TFLITE-cs_xr_8450: jpxk7x115 +yolov8_det_quantized_QNN-cs_8_gen_2: j5mnw8zwp +yolov8_det_quantized_QNN-cs_8_gen_3: jgn69ker5 +yolov8_det_quantized_QNN-cs_8_elite: jprv4wy9g +yolov8_det_quantized_QNN-cs_6490: jp2k7em4p +yolov8_det_quantized_QNN-cs_8550: jpy14md7p +yolov8_det_quantized_QNN-cs_x_elite: jp0z16r65 +yolov8_det_quantized_QNN-cs_auto_lemans_8255: jp8q317xp +yolov8_det_quantized_QNN-cs_auto_lemans_8775: jgkel8y2g +yolov8_det_quantized_QNN-cs_auto_lemans_8650: j5q67v24p +yolov8_det_quantized_QNN-cs_auto_makena_8295: jglv0lk85 +yolov8_det_quantized_QNN-cs_xr_8450: j56y3w10p +yolov8_det_quantized_ONNX-cs_8_gen_2: jp3j46mlg +yolov8_det_quantized_ONNX-cs_8_gen_3: jgo218vxp +yolov8_det_quantized_ONNX-cs_8_elite: jpv617wj5 +yolov8_det_quantized_ONNX-cs_x_elite: jgjv0qlxg +yolov8_det_quantized_ONNX_DML_NPU-cs_x_elite: jpedryv15 +yolov8_det_TFLITE-cs_8_gen_2: jp14d6d2p +yolov8_det_TFLITE-cs_8_gen_3: jgdxr2rep +yolov8_det_TFLITE-cs_8_elite: j57yj9jl5 +yolov8_det_TFLITE-cs_8550: jp4lx3xv5 +yolov8_det_TFLITE-cs_auto_lemans_8255: jpxk7x715 +yolov8_det_TFLITE-cs_auto_lemans_8775: j5mnw8wwp +yolov8_det_TFLITE-cs_auto_lemans_8650: jgn69k9r5 +yolov8_det_TFLITE-cs_auto_makena_8295: jprv4w49g +yolov8_det_TFLITE-cs_xr_8450: jp2k7e74p +yolov8_det_QNN-cs_8_gen_2: jpy14m47p +yolov8_det_QNN-cs_8_gen_3: jp0z16165 +yolov8_det_QNN-cs_8_elite: jp8q313xp +yolov8_det_QNN-cs_8550: jgkel8l2g +yolov8_det_QNN-cs_x_elite: j5q67v74p +yolov8_det_QNN-cs_auto_lemans_8255: jglv0l085 +yolov8_det_QNN-cs_auto_lemans_8775: j56y3w30p +yolov8_det_QNN-cs_auto_lemans_8650: jp3j464lg +yolov8_det_QNN-cs_auto_makena_8295: jgo2181xp +yolov8_det_QNN-cs_xr_8450: jpv6171j5 +yolov8_det_ONNX-cs_8_gen_2: jgjv0q0xg +yolov8_det_ONNX-cs_8_gen_3: jpedryr15 +yolov8_det_ONNX-cs_8_elite: jgz3xnxk5 +yolov8_det_ONNX-cs_x_elite: j5wed4m65 +yolov8_det_ONNX_DML_GPU-cs_x_elite: jg9l3d9lg +yolov8_det_ONNX_DML_NPU-cs_x_elite: jp14d6q2p +yolov7_quantized_TFLITE-cs_8_gen_2: jpxk7xe15 +yolov7_quantized_TFLITE-cs_8_gen_3: j5mnw8vwp +yolov7_quantized_TFLITE-cs_8_elite: jgn69krr5 +yolov7_quantized_TFLITE-cs_6490: jprv4w19g +yolov7_quantized_TFLITE-cs_8250: jp2k7e34p +yolov7_quantized_TFLITE-cs_8550: jpy14mv7p +yolov7_quantized_TFLITE-cs_auto_lemans_8255: jp0z16e65 +yolov7_quantized_TFLITE-cs_auto_lemans_8775: jp8q31wxp +yolov7_quantized_TFLITE-cs_auto_lemans_8650: jgkel8r2g +yolov7_quantized_TFLITE-cs_auto_makena_8295: j5q67v94p +yolov7_quantized_TFLITE-cs_xr_8450: jglv0le85 +yolov7_quantized_QNN-cs_8_gen_2: j56y3wq0p +yolov7_quantized_QNN-cs_8_gen_3: jp3j46qlg +yolov7_quantized_QNN-cs_8_elite: jgo218exp +yolov7_quantized_QNN-cs_6490: jpv617zj5 +yolov7_quantized_QNN-cs_8550: jgjv0qkxg +yolov7_quantized_QNN-cs_x_elite: jpedry415 +yolov7_quantized_QNN-cs_auto_lemans_8255: jgz3xnvk5 +yolov7_quantized_QNN-cs_auto_lemans_8775: j5wed4x65 +yolov7_quantized_QNN-cs_auto_lemans_8650: jg9l3d8lg +yolov7_quantized_QNN-cs_auto_makena_8295: jp14d632p +yolov7_quantized_QNN-cs_xr_8450: jgdxr20ep +yolov7_quantized_ONNX-cs_8_gen_2: j57yj96l5 +yolov7_quantized_ONNX-cs_8_gen_3: jp4lx38v5 +yolov7_quantized_ONNX-cs_8_elite: jpxk7xm15 +yolov7_quantized_ONNX-cs_x_elite: j5mnw84wp +yolov7_quantized_ONNX_DML_NPU-cs_x_elite: jgn69kxr5 +yolov7_TFLITE-cs_8_gen_2: jp8q318xp +yolov7_TFLITE-cs_8_gen_3: jgkel8d2g +yolov7_TFLITE-cs_8_elite: j5q67vw4p +yolov7_TFLITE-cs_8550: jglv0l785 +yolov7_TFLITE-cs_auto_lemans_8255: j56y3wv0p +yolov7_TFLITE-cs_auto_lemans_8775: jp3j468lg +yolov7_TFLITE-cs_auto_lemans_8650: jgo218mxp +yolov7_TFLITE-cs_auto_makena_8295: jpv6174j5 +yolov7_TFLITE-cs_xr_8450: jgjv0q1xg +yolov7_QNN-cs_8_gen_2: jpedry215 +yolov7_QNN-cs_8_gen_3: jgz3xnwk5 +yolov7_QNN-cs_8_elite: j5wed4865 +yolov7_QNN-cs_8550: jg9l3dklg +yolov7_QNN-cs_x_elite: jp14d672p +yolov7_QNN-cs_auto_lemans_8255: jgdxr28ep +yolov7_QNN-cs_auto_lemans_8775: j5wed4835 +yolov7_QNN-cs_auto_lemans_8650: jg9l3dkwg +yolov7_QNN-cs_auto_makena_8295: jp14d678p +yolov7_QNN-cs_xr_8450: jgdxr28rp +yolov7_ONNX-cs_8_gen_2: j57yj9kv5 +yolov7_ONNX-cs_8_gen_3: jp4lx3m85 +yolov7_ONNX-cs_8_elite: jpxk7x335 +yolov7_ONNX-cs_x_elite: j5mnw8odp +yolov7_ONNX_DML_GPU-cs_x_elite: jgn69kok5 +yolov7_ONNX_DML_NPU-cs_x_elite: jprv4wo0g +yolov6_TFLITE-cs_8_gen_2: jp8q316kp +yolov6_TFLITE-cs_8_gen_3: jgkel8owg +yolov6_TFLITE-cs_8_elite: j5q67vznp +yolov6_TFLITE-cs_8550: jglv0loj5 +yolov6_TFLITE-cs_auto_lemans_8255: j56y3wr6p +yolov6_TFLITE-cs_auto_lemans_8775: jp3j46x3g +yolov6_TFLITE-cs_auto_lemans_8650: jgo218oqp +yolov6_TFLITE-cs_auto_makena_8295: jpv6179k5 +yolov6_TFLITE-cs_xr_8450: jgjv0qwvg +yolov6_QNN-cs_8_gen_2: jpedrylo5 +yolov6_QNN-cs_8_gen_3: jgz3xn4o5 +yolov6_QNN-cs_8_elite: j5wed4135 +yolov6_QNN-cs_8550: jg9l3dxwg +yolov6_QNN-cs_x_elite: jp14d6v8p +yolov6_QNN-cs_auto_lemans_8255: jgdxr2zrp +yolov6_QNN-cs_auto_lemans_8775: j57yj97v5 +yolov6_QNN-cs_auto_lemans_8650: jp4lx3985 +yolov6_QNN-cs_auto_makena_8295: jpxk7xd35 +yolov6_QNN-cs_xr_8450: j5mnw8ddp +yolov6_ONNX-cs_8_gen_2: jgn69k7k5 +yolov6_ONNX-cs_8_gen_3: jprv4wn0g +yolov6_ONNX-cs_8_elite: jp2k7evrp +yolov6_ONNX-cs_x_elite: jpy14m78p +yolov6_ONNX_DML_GPU-cs_x_elite: jp0z16v95 +yolov6_ONNX_DML_NPU-cs_x_elite: jp8q314kp +yolov11_det_TFLITE-cs_8_gen_2: j56y3wd6p +yolov11_det_TFLITE-cs_8_gen_3: jp3j46w3g +yolov11_det_TFLITE-cs_8_elite: jgo2184qp +yolov11_det_TFLITE-cs_8550: jpv6177k5 +yolov11_det_TFLITE-cs_auto_lemans_8255: jgjv0qqvg +yolov11_det_TFLITE-cs_auto_lemans_8775: jpedryyo5 +yolov11_det_TFLITE-cs_auto_lemans_8650: jgz3xnno5 +yolov11_det_TFLITE-cs_auto_makena_8295: j5wed4435 +yolov11_det_TFLITE-cs_xr_8450: jg9l3ddwg +yolov11_det_QNN-cs_8_gen_2: jp14d668p +yolov11_det_QNN-cs_8_gen_3: jgdxr22rp +yolov11_det_QNN-cs_8_elite: j57yj99v5 +yolov11_det_QNN-cs_8550: jp4lx3385 +yolov11_det_QNN-cs_x_elite: jpxk7xx35 +yolov11_det_QNN-cs_auto_lemans_8255: j5mnw88dp +yolov11_det_QNN-cs_auto_lemans_8775: jgn69kkk5 +yolov11_det_QNN-cs_auto_lemans_8650: jprv4ww0g +yolov11_det_QNN-cs_auto_makena_8295: jp2k7eerp +yolov11_det_QNN-cs_xr_8450: jpy14mm8p +yolov11_det_ONNX-cs_8_gen_2: jp0z16695 +yolov11_det_ONNX-cs_8_gen_3: jp8q311kp +yolov11_det_ONNX-cs_8_elite: jgkel88wg +yolov11_det_ONNX-cs_x_elite: j5q67vvnp +yolov11_det_ONNX_DML_GPU-cs_x_elite: jglv0llj5 +yolov11_det_ONNX_DML_NPU-cs_x_elite: j56y3ww6p +yolonas_quantized_TFLITE-cs_8_gen_2: jgjv0q7vg +yolonas_quantized_TFLITE-cs_8_gen_3: jpedryzo5 +yolonas_quantized_TFLITE-cs_8_elite: jgz3xnmo5 +yolonas_quantized_TFLITE-cs_6490: j5wed4735 +yolonas_quantized_TFLITE-cs_8250: jg9l3dmwg +yolonas_quantized_TFLITE-cs_8550: jp14d6j8p +yolonas_quantized_TFLITE-cs_auto_lemans_8255: jgdxr23rp +yolonas_quantized_TFLITE-cs_auto_lemans_8775: j57yj94v5 +yolonas_quantized_TFLITE-cs_auto_lemans_8650: jp4lx3185 +yolonas_quantized_TFLITE-cs_auto_makena_8295: jpxk7x435 +yolonas_quantized_TFLITE-cs_xr_8450: j5mnw8mdp +yolonas_quantized_QNN-cs_8_gen_2: jgn69knk5 +yolonas_quantized_QNN-cs_8_gen_3: jprv4w00g +yolonas_quantized_QNN-cs_8_elite: jp2k7ewrp +yolonas_quantized_QNN-cs_6490: jpy14mx8p +yolonas_quantized_QNN-cs_8550: jp0z16j95 +yolonas_quantized_QNN-cs_x_elite: jp8q31xkp +yolonas_quantized_QNN-cs_auto_lemans_8255: jgkel84wg +yolonas_quantized_QNN-cs_auto_lemans_8775: j5q67vynp +yolonas_quantized_QNN-cs_auto_lemans_8650: jglv0lxj5 +yolonas_quantized_QNN-cs_auto_makena_8295: j56y3w76p +yolonas_quantized_QNN-cs_xr_8450: jp3j4693g +yolonas_quantized_ONNX-cs_8_gen_2: jgo218rqp +yolonas_quantized_ONNX-cs_8_gen_3: jpv617lk5 +yolonas_quantized_ONNX-cs_8_elite: jgjv0qrvg +yolonas_quantized_ONNX-cs_x_elite: jpedry7o5 +yolonas_quantized_ONNX_DML_NPU-cs_x_elite: jgz3xnlo5 +yolonas_TFLITE-cs_8_gen_2: jg9l3dzwg +yolonas_TFLITE-cs_8_gen_3: jp14d6n8p +yolonas_TFLITE-cs_8_elite: jgdxr2drp +yolonas_TFLITE-cs_8550: j5wed4lm5 +yolonas_TFLITE-cs_auto_lemans_8255: jg9l3dz8g +yolonas_TFLITE-cs_auto_lemans_8775: jp14d6n7p +yolonas_TFLITE-cs_auto_lemans_8650: jgdxr2dzp +yolonas_TFLITE-cs_auto_makena_8295: j57yj9e95 +yolonas_TFLITE-cs_xr_8450: jp4lx3y15 +yolonas_QNN-cs_8_gen_2: jpxk7xll5 +yolonas_QNN-cs_8_gen_3: j5mnw809p +yolonas_QNN-cs_8_elite: jgn69kzq5 +yolonas_QNN-cs_8550: jprv4wl7g +yolonas_QNN-cs_x_elite: jp2k7erqp +yolonas_QNN-cs_auto_lemans_8255: jpy14molp +yolonas_QNN-cs_auto_lemans_8775: jp0z16mn5 +yolonas_QNN-cs_auto_lemans_8650: jp8q31eop +yolonas_QNN-cs_auto_makena_8295: jgkel82ng +yolonas_QNN-cs_xr_8450: j5q67vlop +yolonas_ONNX-cs_8_gen_2: jglv0lwm5 +yolonas_ONNX-cs_8_gen_3: j56y3woyp +yolonas_ONNX-cs_8_elite: jp3j46ong +yolonas_ONNX-cs_x_elite: jgo218dkp +yolonas_ONNX_DML_GPU-cs_x_elite: jpv6172r5 +yolonas_ONNX_DML_NPU-cs_x_elite: jgjv0q3eg +xlsr_TFLITE-cs_8_gen_2: jg9l3do8g +xlsr_TFLITE-cs_8_gen_3: jp14d6o7p +xlsr_TFLITE-cs_8_elite: jgdxr26zp +xlsr_TFLITE-cs_8550: j57yj9o95 +xlsr_TFLITE-cs_auto_lemans_8255: jp4lx3e15 +xlsr_TFLITE-cs_auto_lemans_8775: jpxk7x0l5 +xlsr_TFLITE-cs_auto_lemans_8650: j5mnw899p +xlsr_TFLITE-cs_auto_makena_8295: jgn69k1q5 +xlsr_TFLITE-cs_xr_8450: jprv4wx7g +xlsr_QNN-cs_8_gen_2: jp2k7eoqp +xlsr_QNN-cs_8_gen_3: jpy14m8lp +xlsr_QNN-cs_8_elite: jp0z16on5 +xlsr_QNN-cs_8550: jp8q31jop +xlsr_QNN-cs_x_elite: jgkel86ng +xlsr_QNN-cs_auto_lemans_8255: j5q67v4op +xlsr_QNN-cs_auto_lemans_8775: jglv0l8m5 +xlsr_QNN-cs_auto_lemans_8650: j56y3wmyp +xlsr_QNN-cs_auto_makena_8295: jp3j467ng +xlsr_QNN-cs_xr_8450: jgo218wkp +xlsr_ONNX-cs_8_gen_2: jpv617mr5 +xlsr_ONNX-cs_8_gen_3: jgjv0qyeg +xlsr_ONNX-cs_8_elite: jpedryxv5 +xlsr_ONNX-cs_x_elite: jgz3xnyx5 +xlsr_ONNX_DML_GPU-cs_x_elite: j5wed4zm5 +xlsr_ONNX_DML_NPU-cs_x_elite: jg9l3d28g +wideresnet50_TFLITE-cs_8_gen_2: jp4lx3415 +wideresnet50_TFLITE-cs_8_gen_3: jpxk7xrl5 +wideresnet50_TFLITE-cs_8_elite: j5mnw8k9p +wideresnet50_TFLITE-cs_8550: jgn69kqq5 +wideresnet50_TFLITE-cs_auto_lemans_8255: jprv4wd7g +wideresnet50_TFLITE-cs_auto_lemans_8775: jp2k7edqp +wideresnet50_TFLITE-cs_auto_lemans_8650: jpy14m2lp +wideresnet50_TFLITE-cs_auto_makena_8295: jp0z169n5 +wideresnet50_TFLITE-cs_xr_8450: jp8q31rop +wideresnet50_QNN-cs_8_gen_2: jgkel80ng +wideresnet50_QNN-cs_8_gen_3: j5q67v1op +wideresnet50_QNN-cs_8_elite: jglv0lqm5 +wideresnet50_QNN-cs_8550: j56y3w0yp +wideresnet50_QNN-cs_x_elite: jp3j46rng +wideresnet50_QNN-cs_auto_lemans_8255: jgo2189kp +wideresnet50_QNN-cs_auto_lemans_8775: jpv617nr5 +wideresnet50_QNN-cs_auto_lemans_8650: jgjv0q8eg +wideresnet50_QNN-cs_auto_makena_8295: jpedrynv5 +wideresnet50_QNN-cs_xr_8450: jgz3xn0x5 +wideresnet50_ONNX-cs_8_gen_2: j5wed4rm5 +wideresnet50_ONNX-cs_8_gen_3: jg9l3dq8g +wideresnet50_ONNX-cs_8_elite: jp14d6m7p +wideresnet50_ONNX-cs_x_elite: jgdxr2mzp +wideresnet50_ONNX_DML_GPU-cs_x_elite: j57yj9895 +wideresnet50_ONNX_DML_NPU-cs_x_elite: jp4lx3215 +whisper_tiny_en_TFLITE-cs_8_gen_2_WhisperEncoder: jprv4w77g +whisper_tiny_en_TFLITE-cs_8_gen_2_WhisperDecoder: jp2k7ezqp +whisper_tiny_en_TFLITE-cs_8_gen_3_WhisperEncoder: jpy14mylp +whisper_tiny_en_TFLITE-cs_8_gen_3_WhisperDecoder: jp0z16xn5 +whisper_tiny_en_TFLITE-cs_8_elite_WhisperEncoder: jp8q31kop +whisper_tiny_en_TFLITE-cs_8_elite_WhisperDecoder: jgkel8kng +whisper_tiny_en_TFLITE-cs_8550_WhisperEncoder: j5q67vdop +whisper_tiny_en_TFLITE-cs_8550_WhisperDecoder: jglv0l9m5 +whisper_tiny_en_TFLITE-cs_auto_lemans_8255_WhisperEncoder: j56y3w9yp +whisper_tiny_en_TFLITE-cs_auto_lemans_8255_WhisperDecoder: jp3j46lng +whisper_tiny_en_TFLITE-cs_auto_lemans_8775_WhisperEncoder: jgo2187kp +whisper_tiny_en_TFLITE-cs_auto_lemans_8775_WhisperDecoder: jpv617yr5 +whisper_tiny_en_TFLITE-cs_auto_lemans_8650_WhisperEncoder: jgjv0q6eg +whisper_tiny_en_TFLITE-cs_auto_lemans_8650_WhisperDecoder: jpedry0v5 +whisper_tiny_en_TFLITE-cs_auto_makena_8295_WhisperEncoder: jgz3xnqx5 +whisper_tiny_en_TFLITE-cs_auto_makena_8295_WhisperDecoder: j5wed40m5 +whisper_tiny_en_TFLITE-cs_xr_8450_WhisperEncoder: jg9l3d78g +whisper_tiny_en_TFLITE-cs_xr_8450_WhisperDecoder: jp14d6k7p +whisper_tiny_en_QNN-cs_8_gen_2_WhisperEncoder: jgdxr2yzp +whisper_tiny_en_QNN-cs_8_gen_2_WhisperDecoder: j5wed4045 +whisper_tiny_en_QNN-cs_8_gen_3_WhisperEncoder: jg9l3d7mg +whisper_tiny_en_QNN-cs_8_gen_3_WhisperDecoder: jp14d6knp +whisper_tiny_en_QNN-cs_8_elite_WhisperEncoder: jgdxr2y6p +whisper_tiny_en_QNN-cs_8_elite_WhisperDecoder: j57yj91n5 +whisper_tiny_en_QNN-cs_8550_WhisperEncoder: jp4lx3625 +whisper_tiny_en_QNN-cs_8550_WhisperDecoder: jpxk7x885 +whisper_tiny_en_QNN-cs_x_elite_WhisperEncoder: j5mnw817p +whisper_tiny_en_QNN-cs_x_elite_WhisperDecoder: jgn69kdj5 +whisper_tiny_en_QNN-cs_auto_lemans_8255_WhisperEncoder: jprv4wmkg +whisper_tiny_en_QNN-cs_auto_lemans_8255_WhisperDecoder: jp2k7eq6p +whisper_tiny_en_QNN-cs_auto_lemans_8775_WhisperEncoder: jpy14mk0p +whisper_tiny_en_QNN-cs_auto_lemans_8775_WhisperDecoder: jp0z16w05 +whisper_tiny_en_QNN-cs_auto_lemans_8650_WhisperEncoder: jp8q31nqp +whisper_tiny_en_QNN-cs_auto_lemans_8650_WhisperDecoder: jgkel81vg +whisper_tiny_en_QNN-cs_auto_makena_8295_WhisperEncoder: j5q67vnep +whisper_tiny_en_QNN-cs_auto_makena_8295_WhisperDecoder: jglv0ld25 +whisper_tiny_en_QNN-cs_xr_8450_WhisperEncoder: jp3j46dmg +whisper_tiny_en_QNN-cs_xr_8450_WhisperDecoder: jgo218x1p +whisper_tiny_en_ONNX-cs_8_gen_2_WhisperEncoder: jpv6178z5 +whisper_tiny_en_ONNX-cs_8_gen_2_WhisperDecoder: jgjv0q91g +whisper_tiny_en_ONNX-cs_8_gen_3_WhisperEncoder: jpedryq85 +whisper_tiny_en_ONNX-cs_8_gen_3_WhisperDecoder: jgz3xn645 +whisper_tiny_en_ONNX-cs_8_elite_WhisperEncoder: j5wed4k45 +whisper_tiny_en_ONNX-cs_8_elite_WhisperDecoder: jg9l3drmg +whisper_tiny_en_ONNX-cs_x_elite_WhisperEncoder: jp14d69np +whisper_tiny_en_ONNX-cs_x_elite_WhisperDecoder: jgdxr2k6p +whisper_tiny_en_ONNX_DML_GPU-cs_x_elite_WhisperEncoder: j57yj9mn5 +whisper_tiny_en_ONNX_DML_GPU-cs_x_elite_WhisperDecoder: jp4lx3725 +whisper_tiny_en_ONNX_DML_NPU-cs_x_elite_WhisperEncoder: jpxk7xq85 +whisper_tiny_en_ONNX_DML_NPU-cs_x_elite_WhisperDecoder: j5mnw877p +whisper_small_en_TFLITE-cs_8_gen_2_WhisperEncoder: jp0z16n05 +whisper_small_en_TFLITE-cs_8_gen_2_WhisperDecoder: jp8q31lqp +whisper_small_en_TFLITE-cs_8_gen_3_WhisperEncoder: jgkel8jvg +whisper_small_en_TFLITE-cs_8_gen_3_WhisperDecoder: j5q67vjep +whisper_small_en_TFLITE-cs_8_elite_WhisperEncoder: jglv0lj25 +whisper_small_en_TFLITE-cs_8_elite_WhisperDecoder: j56y3wknp +whisper_small_en_TFLITE-cs_8550_WhisperEncoder: jp3j46ymg +whisper_small_en_TFLITE-cs_8550_WhisperDecoder: jgo218j1p +whisper_small_en_TFLITE-cs_auto_lemans_8255_WhisperEncoder: jpv617jz5 +whisper_small_en_TFLITE-cs_auto_lemans_8255_WhisperDecoder: jgjv0qj1g +whisper_small_en_TFLITE-cs_auto_lemans_8775_WhisperEncoder: jpedryj85 +whisper_small_en_TFLITE-cs_auto_lemans_8775_WhisperDecoder: jgz3xn145 +whisper_small_en_TFLITE-cs_auto_lemans_8650_WhisperEncoder: j5wed4j45 +whisper_small_en_TFLITE-cs_auto_lemans_8650_WhisperDecoder: jg9l3d6mg +whisper_small_en_TFLITE-cs_auto_makena_8295_WhisperEncoder: jp14d6rnp +whisper_small_en_TFLITE-cs_auto_makena_8295_WhisperDecoder: jgdxr2j6p +whisper_small_en_TFLITE-cs_xr_8450_WhisperEncoder: j57yj9qn5 +whisper_small_en_TFLITE-cs_xr_8450_WhisperDecoder: jp4lx3z25 +whisper_small_en_QNN-cs_8_gen_2_WhisperEncoder: jpxk7xw85 +whisper_small_en_QNN-cs_8_gen_2_WhisperDecoder: j5mnw8j7p +whisper_small_en_QNN-cs_8_gen_3_WhisperEncoder: jgn69kjj5 +whisper_small_en_QNN-cs_8_gen_3_WhisperDecoder: jprv4wzkg +whisper_small_en_QNN-cs_8_elite_WhisperEncoder: jp2k7e26p +whisper_small_en_QNN-cs_8_elite_WhisperDecoder: jpy14m90p +whisper_small_en_QNN-cs_8550_WhisperEncoder: jp0z16y05 +whisper_small_en_QNN-cs_8550_WhisperDecoder: jp8q31oqp +whisper_small_en_QNN-cs_x_elite_WhisperEncoder: jgkel8zvg +whisper_small_en_QNN-cs_x_elite_WhisperDecoder: j5q67v8ep +whisper_small_en_QNN-cs_auto_lemans_8255_WhisperEncoder: jglv0ln25 +whisper_small_en_QNN-cs_auto_lemans_8255_WhisperDecoder: j56y3w6np +whisper_small_en_QNN-cs_auto_lemans_8775_WhisperEncoder: jp3j46kmg +whisper_small_en_QNN-cs_auto_lemans_8775_WhisperDecoder: jgo218y1p +whisper_small_en_QNN-cs_auto_lemans_8650_WhisperEncoder: jpv6173z5 +whisper_small_en_QNN-cs_auto_lemans_8650_WhisperDecoder: jgjv0qx1g +whisper_small_en_QNN-cs_auto_makena_8295_WhisperEncoder: jpedry985 +whisper_small_en_QNN-cs_auto_makena_8295_WhisperDecoder: jgz3xne45 +whisper_small_en_QNN-cs_xr_8450_WhisperEncoder: j5wed4o45 +whisper_small_en_QNN-cs_xr_8450_WhisperDecoder: jg9l3dvmg +whisper_small_en_ONNX-cs_8_gen_2_WhisperEncoder: jp14d60np +whisper_small_en_ONNX-cs_8_gen_2_WhisperDecoder: jgdxr2w6p +whisper_small_en_ONNX-cs_8_gen_3_WhisperEncoder: j57yj9zn5 +whisper_small_en_ONNX-cs_8_gen_3_WhisperDecoder: jp4lx3q25 +whisper_small_en_ONNX-cs_8_elite_WhisperEncoder: jpxk7xv85 +whisper_small_en_ONNX-cs_8_elite_WhisperDecoder: j5mnw8r7p +whisper_small_en_ONNX-cs_x_elite_WhisperEncoder: jgn69k2j5 +whisper_small_en_ONNX-cs_x_elite_WhisperDecoder: jprv4wkkg +whisper_small_en_ONNX_DML_GPU-cs_x_elite_WhisperEncoder: jp2k7e86p +whisper_small_en_ONNX_DML_GPU-cs_x_elite_WhisperDecoder: jpy14me0p +whisper_small_en_ONNX_DML_NPU-cs_x_elite_WhisperEncoder: jp0z16q05 +whisper_small_en_ONNX_DML_NPU-cs_x_elite_WhisperDecoder: jp8q319qp +whisper_base_en_TFLITE-cs_8_gen_2_WhisperEncoder: jgjv0qm1g +whisper_base_en_TFLITE-cs_8_gen_2_WhisperDecoder: jpedry185 +whisper_base_en_TFLITE-cs_8_gen_3_WhisperEncoder: jgz3xn945 +whisper_base_en_TFLITE-cs_8_gen_3_WhisperDecoder: j5wed4v45 +whisper_base_en_TFLITE-cs_8_elite_WhisperEncoder: jg9l3d1mg +whisper_base_en_TFLITE-cs_8_elite_WhisperDecoder: jp14d6lnp +whisper_base_en_TFLITE-cs_8550_WhisperEncoder: jgdxr296p +whisper_base_en_TFLITE-cs_8550_WhisperDecoder: j5wed4vz5 +whisper_base_en_TFLITE-cs_auto_lemans_8255_WhisperEncoder: jg9l3d1qg +whisper_base_en_TFLITE-cs_auto_lemans_8255_WhisperDecoder: jp14d6lkp +whisper_base_en_TFLITE-cs_auto_lemans_8775_WhisperEncoder: jgdxr29kp +whisper_base_en_TFLITE-cs_auto_lemans_8775_WhisperDecoder: j57yj9wq5 +whisper_base_en_TFLITE-cs_auto_lemans_8650_WhisperEncoder: jp4lx3oq5 +whisper_base_en_TFLITE-cs_auto_lemans_8650_WhisperDecoder: jpxk7xjj5 +whisper_base_en_TFLITE-cs_auto_makena_8295_WhisperEncoder: j5mnw82yp +whisper_base_en_TFLITE-cs_auto_makena_8295_WhisperDecoder: jgn69k8v5 +whisper_base_en_TFLITE-cs_xr_8450_WhisperEncoder: jprv4wjvg +whisper_base_en_TFLITE-cs_xr_8450_WhisperDecoder: jp2k7enxp +whisper_base_en_QNN-cs_8_gen_2_WhisperEncoder: jpy14m0rp +whisper_base_en_QNN-cs_8_gen_2_WhisperDecoder: jp0z16725 +whisper_base_en_QNN-cs_8_gen_3_WhisperEncoder: jp8q31vzp +whisper_base_en_QNN-cs_8_gen_3_WhisperDecoder: jgkel8myg +whisper_base_en_QNN-cs_8_elite_WhisperEncoder: j5q67vo7p +whisper_base_en_QNN-cs_8_elite_WhisperDecoder: jglv0lre5 +whisper_base_en_QNN-cs_8550_WhisperEncoder: j56y3wlvp +whisper_base_en_QNN-cs_8550_WhisperDecoder: jp3j462xg +whisper_base_en_QNN-cs_x_elite_WhisperEncoder: jgo218q4p +whisper_base_en_QNN-cs_x_elite_WhisperDecoder: jpv617x75 +whisper_base_en_QNN-cs_auto_lemans_8255_WhisperEncoder: jgjv0q47g +whisper_base_en_QNN-cs_auto_lemans_8255_WhisperDecoder: jpedry375 +whisper_base_en_QNN-cs_auto_lemans_8775_WhisperEncoder: jgz3xnkz5 +whisper_base_en_QNN-cs_auto_lemans_8775_WhisperDecoder: j5wed4nz5 +whisper_base_en_QNN-cs_auto_lemans_8650_WhisperEncoder: jg9l3deqg +whisper_base_en_QNN-cs_auto_lemans_8650_WhisperDecoder: jp14d6xkp +whisper_base_en_QNN-cs_auto_makena_8295_WhisperEncoder: jgdxr2lkp +whisper_base_en_QNN-cs_auto_makena_8295_WhisperDecoder: j57yj93q5 +whisper_base_en_QNN-cs_xr_8450_WhisperEncoder: jp4lx30q5 +whisper_base_en_QNN-cs_xr_8450_WhisperDecoder: jpxk7x2j5 +whisper_base_en_ONNX-cs_8_gen_2_WhisperEncoder: j5mnw8yyp +whisper_base_en_ONNX-cs_8_gen_2_WhisperDecoder: jgn69nvv5 +whisper_base_en_ONNX-cs_8_gen_3_WhisperEncoder: jprv403vg +whisper_base_en_ONNX-cs_8_gen_3_WhisperDecoder: jp2k7wyxp +whisper_base_en_ONNX-cs_8_elite_WhisperEncoder: jpy14x3rp +whisper_base_en_ONNX-cs_8_elite_WhisperDecoder: jp0z1j025 +whisper_base_en_ONNX-cs_x_elite_WhisperEncoder: jp8q3xyzp +whisper_base_en_ONNX-cs_x_elite_WhisperDecoder: jgkel4xyg +whisper_base_en_ONNX_DML_GPU-cs_x_elite_WhisperEncoder: j5q67yq7p +whisper_base_en_ONNX_DML_GPU-cs_x_elite_WhisperDecoder: jglv0xme5 +whisper_base_en_ONNX_DML_NPU-cs_x_elite_WhisperEncoder: j56y374vp +whisper_base_en_ONNX_DML_NPU-cs_x_elite_WhisperDecoder: jp3j490xg +vit_TFLITE-cs_8_gen_2: jg9l3mnqg +vit_TFLITE-cs_8_gen_3: jp14djzkp +vit_TFLITE-cs_8_elite: jgdxr31kp +vit_TFLITE-cs_8550: j57yj4rq5 +vit_TFLITE-cs_auto_lemans_8255: jp4lx1rq5 +vit_TFLITE-cs_auto_lemans_8775: j5mnwmxyp +vit_TFLITE-cs_auto_lemans_8650: jgn69n6v5 +vit_TFLITE-cs_auto_makena_8295: jprv40vvg +vit_TFLITE-cs_xr_8450: jp2k7wkxp +vit_QNN-cs_8_gen_2: jpy14x1rp +vit_QNN-cs_8_gen_3: jp0z1jz25 +vit_QNN-cs_8_elite: jp8q3xqzp +vit_QNN-cs_8550: jgkel4eyg +vit_QNN-cs_x_elite: j5q67y67p +vit_QNN-cs_auto_lemans_8255: jglv0xve5 +vit_QNN-cs_auto_lemans_8775: j56y37yvp +vit_QNN-cs_auto_lemans_8650: jp3j49jxg +vit_QNN-cs_auto_makena_8295: jgo21r24p +vit_QNN-cs_xr_8450: jpv61d675 +vit_ONNX-cs_8_gen_2: jgjv07v7g +vit_ONNX-cs_8_gen_3: jpedrzd75 +vit_ONNX-cs_8_elite: jgz3xm3z5 +vit_ONNX-cs_x_elite: j5wed7ez5 +vit_ONNX_DML_GPU-cs_x_elite: jg9l3mlqg +vit_ONNX_DML_NPU-cs_x_elite: jp14dj4kp +unet_segmentation_TFLITE-cs_8_gen_2: jp4lx1lq5 +unet_segmentation_TFLITE-cs_8_gen_3: jpxk74kj5 +unet_segmentation_TFLITE-cs_8_elite: j5mnwmnyp +unet_segmentation_TFLITE-cs_8550: jgn69n0v5 +unet_segmentation_TFLITE-cs_auto_lemans_8255: jprv406vg +unet_segmentation_TFLITE-cs_auto_lemans_8775: jp2k7wxxp +unet_segmentation_TFLITE-cs_auto_lemans_8650: jpy14xzrp +unet_segmentation_TFLITE-cs_auto_makena_8295: jp0z1j425 +unet_segmentation_TFLITE-cs_xr_8450: jp8q3x2zp +unet_segmentation_QNN-cs_8_gen_2: jgkel4vyg +unet_segmentation_QNN-cs_8_gen_3: j5q67y07p +unet_segmentation_QNN-cs_8_elite: jglv0x4e5 +unet_segmentation_QNN-cs_8550: j56y372vp +unet_segmentation_QNN-cs_x_elite: jp3j49nxg +unet_segmentation_QNN-cs_auto_lemans_8255: jgo21rz4p +unet_segmentation_QNN-cs_auto_lemans_8775: jpv61dq75 +unet_segmentation_QNN-cs_auto_lemans_8650: jgjv07d7g +unet_segmentation_QNN-cs_auto_makena_8295: jpedrzo75 +unet_segmentation_QNN-cs_xr_8450: jgz3xm2z5 +unet_segmentation_ONNX-cs_8_gen_2: j5wed7wz5 +unet_segmentation_ONNX-cs_8_gen_3: jg9l3m0qg +unet_segmentation_ONNX-cs_8_elite: jp14dj2kp +unet_segmentation_ONNX-cs_x_elite: jgdxr3nkp +unet_segmentation_ONNX_DML_GPU-cs_x_elite: j5wed7wj5 +unet_segmentation_ONNX_DML_NPU-cs_x_elite: jg9l3m0vg +trocr_TFLITE-cs_8_gen_2_TrOCREncoder: jp4lx1kl5 +trocr_TFLITE-cs_8_gen_2_TrOCRDecoder: jpxk74n95 +trocr_TFLITE-cs_8_gen_3_TrOCREncoder: j5mnwmqqp +trocr_TFLITE-cs_8_gen_3_TrOCRDecoder: jgn69nlm5 +trocr_TFLITE-cs_8_elite_TrOCREncoder: jprv408eg +trocr_TFLITE-cs_8_elite_TrOCRDecoder: jp2k7w0mp +trocr_TFLITE-cs_8550_TrOCREncoder: jpy14xr4p +trocr_TFLITE-cs_8550_TrOCRDecoder: jp0z1j3e5 +trocr_TFLITE-cs_auto_lemans_8255_TrOCREncoder: jp8q3x08p +trocr_TFLITE-cs_auto_lemans_8255_TrOCRDecoder: jgkel47og +trocr_TFLITE-cs_auto_lemans_8775_TrOCREncoder: j5q67yemp +trocr_TFLITE-cs_auto_lemans_8775_TrOCRDecoder: jglv0x6l5 +trocr_TFLITE-cs_auto_lemans_8650_TrOCREncoder: j56y37e7p +trocr_TFLITE-cs_auto_lemans_8650_TrOCRDecoder: jp3j49vzg +trocr_TFLITE-cs_auto_makena_8295_TrOCREncoder: jgo21rkdp +trocr_TFLITE-cs_auto_makena_8295_TrOCRDecoder: jpv61d0m5 +trocr_TFLITE-cs_xr_8450_TrOCREncoder: jgjv07z8g +trocr_TFLITE-cs_xr_8450_TrOCRDecoder: jpedrze05 +trocr_QNN-cs_8_gen_2_TrOCREncoder: jgz3xmo65 +trocr_QNN-cs_8_gen_2_TrOCRDecoder: j5wed72j5 +trocr_QNN-cs_8_gen_3_TrOCREncoder: jg9l3mjvg +trocr_QNN-cs_8_gen_3_TrOCRDecoder: jp14djylp +trocr_QNN-cs_8_elite_TrOCREncoder: jgdxr3elp +trocr_QNN-cs_8_elite_TrOCRDecoder: j57yj4lr5 +trocr_QNN-cs_8550_TrOCREncoder: jp4lx1dl5 +trocr_QNN-cs_8550_TrOCRDecoder: jpxk74695 +trocr_QNN-cs_x_elite_TrOCREncoder: j5mnwm6qp +trocr_QNN-cs_x_elite_TrOCRDecoder: jgn69nmm5 +trocr_QNN-cs_auto_lemans_8255_TrOCREncoder: jprv402eg +trocr_QNN-cs_auto_lemans_8255_TrOCRDecoder: jp2k7w9mp +trocr_QNN-cs_auto_lemans_8775_TrOCREncoder: jpy14xj4p +trocr_QNN-cs_auto_lemans_8775_TrOCRDecoder: jp0z1j2e5 +trocr_QNN-cs_auto_lemans_8650_TrOCREncoder: jp8q3xm8p +trocr_QNN-cs_auto_lemans_8650_TrOCRDecoder: jgkel4qog +trocr_QNN-cs_auto_makena_8295_TrOCREncoder: j5q67yrmp +trocr_QNN-cs_auto_makena_8295_TrOCRDecoder: jglv0x2l5 +trocr_QNN-cs_xr_8450_TrOCREncoder: j56y37z7p +trocr_QNN-cs_xr_8450_TrOCRDecoder: jp3j491zg +trocr_ONNX-cs_8_gen_2_TrOCREncoder: jgo21rndp +trocr_ONNX-cs_8_gen_2_TrOCRDecoder: jpv61drm5 +trocr_ONNX-cs_8_gen_3_TrOCREncoder: jgjv0728g +trocr_ONNX-cs_8_gen_3_TrOCRDecoder: jpedrzw05 +trocr_ONNX-cs_8_elite_TrOCREncoder: jgz3xmj65 +trocr_ONNX-cs_8_elite_TrOCRDecoder: j5wed73j5 +trocr_ONNX-cs_x_elite_TrOCREncoder: jg9l3myvg +trocr_ONNX-cs_x_elite_TrOCRDecoder: jp14djwlp +trocr_ONNX_DML_GPU-cs_x_elite_TrOCREncoder: jgdxr3qlp +trocr_ONNX_DML_GPU-cs_x_elite_TrOCRDecoder: j57yj4xr5 +trocr_ONNX_DML_NPU-cs_x_elite_TrOCREncoder: jp4lx1vl5 +trocr_ONNX_DML_NPU-cs_x_elite_TrOCRDecoder: jpxk74y95 +swin_tiny_TFLITE-cs_8_gen_2: jp8q3xz8p +swin_tiny_TFLITE-cs_8_gen_3: jgkel43og +swin_tiny_TFLITE-cs_8_elite: j5q67y3mp +swin_tiny_TFLITE-cs_8550: jglv0x3l5 +swin_tiny_TFLITE-cs_auto_lemans_8255: j56y37n7p +swin_tiny_TFLITE-cs_auto_lemans_8775: jp3j49ezg +swin_tiny_TFLITE-cs_auto_lemans_8650: jgo21r3dp +swin_tiny_TFLITE-cs_auto_makena_8295: jpv61dvm5 +swin_tiny_TFLITE-cs_xr_8450: jgjv07e8g +swin_tiny_QNN-cs_8_gen_2: jpedrzk05 +swin_tiny_QNN-cs_8_gen_3: jgz3xmr65 +swin_tiny_QNN-cs_8_elite: j5wed7qj5 +swin_tiny_QNN-cs_8550: jg9l3mwvg +swin_tiny_QNN-cs_x_elite: jp14djelp +swin_tiny_QNN-cs_auto_lemans_8255: jgdxr3olp +swin_tiny_QNN-cs_auto_lemans_8775: j57yj4dr5 +swin_tiny_QNN-cs_auto_lemans_8650: jp4lx1wl5 +swin_tiny_QNN-cs_auto_makena_8295: jpxk74195 +swin_tiny_QNN-cs_xr_8450: j5mnwmzqp +swin_tiny_ONNX-cs_8_gen_2: jgn69nem5 +swin_tiny_ONNX-cs_8_gen_3: jprv40yeg +swin_tiny_ONNX-cs_8_elite: jp2k7wmmp +swin_tiny_ONNX-cs_x_elite: jpy14xd4p +swin_tiny_ONNX_DML_GPU-cs_x_elite: jp0z1jre5 +swin_tiny_ONNX_DML_NPU-cs_x_elite: jp8q3x78p +swin_small_TFLITE-cs_8_gen_2: j56y3717p +swin_small_TFLITE-cs_8_gen_3: jp3j49mzg +swin_small_TFLITE-cs_8_elite: jgo21rvdp +swin_small_TFLITE-cs_8550: jpv61dwm5 +swin_small_TFLITE-cs_auto_lemans_8255: jgjv07l8g +swin_small_TFLITE-cs_auto_lemans_8775: jpedrzv05 +swin_small_TFLITE-cs_auto_lemans_8650: jgz3xm765 +swin_small_TFLITE-cs_auto_makena_8295: j5wed79j5 +swin_small_TFLITE-cs_xr_8450: jg9l3m4vg +swin_small_QNN-cs_8_gen_2: jp14dj8lp +swin_small_QNN-cs_8_gen_3: jgdxr3vlp +swin_small_QNN-cs_8_elite: j5wed7d65 +swin_small_QNN-cs_8550: jg9l3m3lg +swin_small_QNN-cs_x_elite: jp14djd2p +swin_small_QNN-cs_auto_lemans_8255: jgdxr3rep +swin_small_QNN-cs_auto_lemans_8775: j57yj4jl5 +swin_small_QNN-cs_auto_lemans_8650: jp4lx1xv5 +swin_small_QNN-cs_auto_makena_8295: jpxk74715 +swin_small_QNN-cs_xr_8450: j5mnwmwwp +swin_small_ONNX-cs_8_gen_2: jgn69n9r5 +swin_small_ONNX-cs_8_gen_3: jprv4049g +swin_small_ONNX-cs_8_elite: jp2k7w74p +swin_small_ONNX-cs_x_elite: jpy14x47p +swin_small_ONNX_DML_GPU-cs_x_elite: jp0z1j165 +swin_small_ONNX_DML_NPU-cs_x_elite: jp8q3x3xp +swin_base_TFLITE-cs_8_gen_2: j56y3730p +swin_base_TFLITE-cs_8_gen_3: jp3j494lg +swin_base_TFLITE-cs_8_elite: jgo21r1xp +swin_base_TFLITE-cs_8550: jpv61d1j5 +swin_base_TFLITE-cs_auto_lemans_8255: jgjv070xg +swin_base_TFLITE-cs_auto_lemans_8775: jpedrzr15 +swin_base_TFLITE-cs_auto_lemans_8650: jgz3xmxk5 +swin_base_TFLITE-cs_auto_makena_8295: j5wed7m65 +swin_base_TFLITE-cs_xr_8450: jg9l3m9lg +swin_base_QNN-cs_8_gen_2: jp14djq2p +swin_base_QNN-cs_8_gen_3: jgdxr37ep +swin_base_QNN-cs_8_elite: j57yj4vl5 +swin_base_QNN-cs_8550: jp4lx1jv5 +swin_base_QNN-cs_x_elite: jpxk74e15 +swin_base_QNN-cs_auto_lemans_8255: j5mnwmvwp +swin_base_QNN-cs_auto_lemans_8775: jgn69nrr5 +swin_base_QNN-cs_auto_lemans_8650: jprv4019g +swin_base_QNN-cs_auto_makena_8295: jp2k7w34p +swin_base_QNN-cs_xr_8450: jpy14xv7p +swin_base_ONNX-cs_8_gen_2: jp0z1je65 +swin_base_ONNX-cs_8_gen_3: jp8q3xwxp +swin_base_ONNX-cs_8_elite: jgkel4r2g +swin_base_ONNX-cs_x_elite: j5q67y94p +swin_base_ONNX_DML_GPU-cs_x_elite: jglv0xe85 +swin_base_ONNX_DML_NPU-cs_x_elite: j56y37q0p +squeezenet1_1_TFLITE-cs_8_gen_2: jpedrz415 +squeezenet1_1_TFLITE-cs_8_gen_3: jgz3xmvk5 +squeezenet1_1_TFLITE-cs_8_elite: j5wed7x65 +squeezenet1_1_TFLITE-cs_8550: jg9l3m8lg +squeezenet1_1_TFLITE-cs_auto_lemans_8255: jp14dj32p +squeezenet1_1_TFLITE-cs_auto_lemans_8775: jgdxr30ep +squeezenet1_1_TFLITE-cs_auto_lemans_8650: j57yj46l5 +squeezenet1_1_TFLITE-cs_auto_makena_8295: jp4lx18v5 +squeezenet1_1_TFLITE-cs_xr_8450: jpxk74m15 +squeezenet1_1_QNN-cs_8_gen_2: j5mnwm4wp +squeezenet1_1_QNN-cs_8_gen_3: jgn69nxr5 +squeezenet1_1_QNN-cs_8_elite: jprv4099g +squeezenet1_1_QNN-cs_8550: jp2k7wj4p +squeezenet1_1_QNN-cs_x_elite: jpy14xn7p +squeezenet1_1_QNN-cs_auto_lemans_8255: jp0z1jk65 +squeezenet1_1_QNN-cs_auto_lemans_8775: jp8q3x8xp +squeezenet1_1_QNN-cs_auto_lemans_8650: jgkel4d2g +squeezenet1_1_QNN-cs_auto_makena_8295: j5q67yw4p +squeezenet1_1_QNN-cs_xr_8450: jglv0x785 +squeezenet1_1_ONNX-cs_8_gen_2: j56y37v0p +squeezenet1_1_ONNX-cs_8_gen_3: jp3j498lg +squeezenet1_1_ONNX-cs_8_elite: jgo21rmxp +squeezenet1_1_ONNX-cs_x_elite: jpv61d4j5 +squeezenet1_1_ONNX_DML_GPU-cs_x_elite: jgjv071xg +squeezenet1_1_ONNX_DML_NPU-cs_x_elite: jpedrz215 +sinet_TFLITE-cs_8_gen_2: jp14dj72p +sinet_TFLITE-cs_8_gen_3: jgdxr38ep +sinet_TFLITE-cs_8_elite: j57yj4kl5 +sinet_TFLITE-cs_8550: jp4lx1mv5 +sinet_TFLITE-cs_auto_lemans_8255: jpxk74315 +sinet_TFLITE-cs_auto_lemans_8775: j5mnwmowp +sinet_TFLITE-cs_auto_lemans_8650: jgn69nor5 +sinet_TFLITE-cs_auto_makena_8295: jprv40o9g +sinet_TFLITE-cs_xr_8450: jp2k7w44p +sinet_QNN-cs_8_gen_2: jpy14xq7p +sinet_QNN-cs_8_gen_3: jp0z1jd65 +sinet_QNN-cs_8_elite: jp8q3x6xp +sinet_QNN-cs_8550: jgkel4o2g +sinet_QNN-cs_x_elite: j5q67yz4p +sinet_QNN-cs_auto_lemans_8255: jglv0xo85 +sinet_QNN-cs_auto_lemans_8775: j56y37r0p +sinet_QNN-cs_auto_lemans_8650: jp3j49xlg +sinet_QNN-cs_auto_makena_8295: jgo21roxp +sinet_QNN-cs_xr_8450: jpv61dej5 +sinet_ONNX-cs_8_gen_2: jgjv07oxg +sinet_ONNX-cs_8_gen_3: jpedrz815 +sinet_ONNX-cs_8_elite: jgz3xm8k5 +sinet_ONNX-cs_x_elite: j5wed7165 +sinet_ONNX_DML_GPU-cs_x_elite: jg9l3mxlg +sinet_ONNX_DML_NPU-cs_x_elite: jp14djv2p +shufflenet_v2_TFLITE-cs_8_gen_2: jp14djv8p +shufflenet_v2_TFLITE-cs_8_gen_3: jgdxr3zrp +shufflenet_v2_TFLITE-cs_8_elite: j57yj47v5 +shufflenet_v2_TFLITE-cs_8550: jp4lx1985 +shufflenet_v2_TFLITE-cs_auto_lemans_8255: jpxk74d35 +shufflenet_v2_TFLITE-cs_auto_lemans_8775: j5mnwmddp +shufflenet_v2_TFLITE-cs_auto_lemans_8650: jgn69n7k5 +shufflenet_v2_TFLITE-cs_auto_makena_8295: jprv40n0g +shufflenet_v2_TFLITE-cs_xr_8450: jp2k7wvrp +shufflenet_v2_QNN-cs_8_gen_2: jpy14x78p +shufflenet_v2_QNN-cs_8_gen_3: jp0z1jv95 +shufflenet_v2_QNN-cs_8_elite: jp8q3x4kp +shufflenet_v2_QNN-cs_8550: jgkel49wg +shufflenet_v2_QNN-cs_x_elite: j5q67ymnp +shufflenet_v2_QNN-cs_auto_lemans_8255: jglv0x1j5 +shufflenet_v2_QNN-cs_auto_lemans_8775: j56y37d6p +shufflenet_v2_QNN-cs_auto_lemans_8650: jp3j49w3g +shufflenet_v2_QNN-cs_auto_makena_8295: jgo21r4qp +shufflenet_v2_QNN-cs_xr_8450: jpv61d7k5 +shufflenet_v2_ONNX-cs_8_gen_2: jgjv07qvg +shufflenet_v2_ONNX-cs_8_gen_3: jpedrzyo5 +shufflenet_v2_ONNX-cs_8_elite: jgz3xmno5 +shufflenet_v2_ONNX-cs_x_elite: j5wed7435 +shufflenet_v2_ONNX_DML_GPU-cs_x_elite: jg9l3mdwg +shufflenet_v2_ONNX_DML_NPU-cs_x_elite: jp14dj68p +sesr_m5_TFLITE-cs_8_gen_2: jpxk74x35 +sesr_m5_TFLITE-cs_8_gen_3: j5mnwm8dp +sesr_m5_TFLITE-cs_8_elite: jgn69nkk5 +sesr_m5_TFLITE-cs_8550: jprv40w0g +sesr_m5_TFLITE-cs_auto_lemans_8255: jp2k7werp +sesr_m5_TFLITE-cs_auto_lemans_8775: jpy14xm8p +sesr_m5_TFLITE-cs_auto_lemans_8650: jp0z1j695 +sesr_m5_TFLITE-cs_auto_makena_8295: jp8q3x1kp +sesr_m5_TFLITE-cs_xr_8450: jgkel48wg +sesr_m5_QNN-cs_8_gen_2: j5q67yvnp +sesr_m5_QNN-cs_8_gen_3: jglv0xlj5 +sesr_m5_QNN-cs_8_elite: j56y37w6p +sesr_m5_QNN-cs_8550: jp3j4963g +sesr_m5_QNN-cs_x_elite: jgo21r8qp +sesr_m5_QNN-cs_auto_lemans_8255: jpv61ddk5 +sesr_m5_QNN-cs_auto_lemans_8775: jgjv077vg +sesr_m5_QNN-cs_auto_lemans_8650: jpedrzzo5 +sesr_m5_QNN-cs_auto_makena_8295: jgz3xmmo5 +sesr_m5_QNN-cs_xr_8450: j5wed7735 +sesr_m5_ONNX-cs_8_gen_2: jg9l3mmwg +sesr_m5_ONNX-cs_8_gen_3: jp14djj8p +sesr_m5_ONNX-cs_8_elite: jgdxr33rp +sesr_m5_ONNX-cs_x_elite: j57yj44v5 +sesr_m5_ONNX_DML_GPU-cs_x_elite: jp4lx1185 +sesr_m5_ONNX_DML_NPU-cs_x_elite: jpxk74435 +sam_TFLITE-cs_8_gen_2_SAMDecoder: jpy14xx8p +sam_TFLITE-cs_8_gen_2_SAMEncoder: jp0z1jj95 +sam_TFLITE-cs_8_gen_3_SAMDecoder: jp8q3xxkp +sam_TFLITE-cs_8_gen_3_SAMEncoder: jgkel44wg +sam_TFLITE-cs_8_elite_SAMDecoder: j5q67yynp +sam_TFLITE-cs_8_elite_SAMEncoder: jglv0xxj5 +sam_TFLITE-cs_8550_SAMDecoder: j56y3776p +sam_TFLITE-cs_8550_SAMEncoder: jp3j4993g +sam_TFLITE-cs_auto_lemans_8255_SAMDecoder: jgo21rrqp +sam_TFLITE-cs_auto_lemans_8255_SAMEncoder: j5mnwj9wp +sam_TFLITE-cs_auto_lemans_8775_SAMDecoder: jgjv07rvg +sam_TFLITE-cs_auto_lemans_8775_SAMEncoder: jpedrz7o5 +sam_TFLITE-cs_auto_lemans_8650_SAMDecoder: jgz3xmlo5 +sam_TFLITE-cs_auto_lemans_8650_SAMEncoder: j5wed7l35 +sam_TFLITE-cs_auto_makena_8295_SAMDecoder: jg9l3mzwg +sam_TFLITE-cs_auto_makena_8295_SAMEncoder: jp14djn8p +sam_TFLITE-cs_xr_8450_SAMDecoder: jgdxr3drp +sam_TFLITE-cs_xr_8450_SAMEncoder: j57yj4ev5 +sam_QNN-cs_8_gen_2_SAMDecoder: jp4lx1y85 +sam_QNN-cs_8_gen_2_SAMEncoder: jpxk74l35 +sam_QNN-cs_8_gen_3_SAMDecoder: j5mnwm0dp +sam_QNN-cs_8_gen_3_SAMEncoder: jgn69nzk5 +sam_QNN-cs_8_elite_SAMDecoder: jprv40l0g +sam_QNN-cs_8_elite_SAMEncoder: jp2k7wrrp +sam_QNN-cs_xr_8450_SAMDecoder: jpy14xo8p +sam_QNN-cs_xr_8450_SAMEncoder: jp0z1jm95 +sam_ONNX-cs_8_gen_2_SAMDecoder: jgkel42wg +sam_ONNX-cs_8_gen_2_SAMEncoder: j5q67ylnp +sam_ONNX-cs_8_gen_3_SAMDecoder: jglv0xyj5 +sam_ONNX-cs_8_gen_3_SAMEncoder: jp3j49z3g +sam_ONNX-cs_8_elite_SAMDecoder: jgo21rlqp +sam_ONNX-cs_8_elite_SAMEncoder: jpv61d2k5 +sam_ONNX-cs_x_elite_SAMDecoder: jgjv073vg +sam_ONNX-cs_x_elite_SAMEncoder: jpedrz6o5 +sam_ONNX_DML_GPU-cs_x_elite_SAMDecoder: jgz3xmzo5 +sam_ONNX_DML_GPU-cs_x_elite_SAMEncoder: j5wed7y35 +sam_ONNX_DML_NPU-cs_x_elite_SAMDecoder: jg9l3mowg +sam_ONNX_DML_NPU-cs_x_elite_SAMEncoder: jp14djo8p +resnext50_TFLITE-cs_8_gen_2: jg9l3mo8g +resnext50_TFLITE-cs_8_gen_3: jp14djo7p +resnext50_TFLITE-cs_8_elite: jgdxr36zp +resnext50_TFLITE-cs_8550: j57yj4o95 +resnext50_TFLITE-cs_auto_lemans_8255: jp4lx1e15 +resnext50_TFLITE-cs_auto_lemans_8775: jpxk740l5 +resnext50_TFLITE-cs_auto_lemans_8650: j5mnwm99p +resnext50_TFLITE-cs_auto_makena_8295: jgn69n1q5 +resnext50_TFLITE-cs_xr_8450: jprv40x7g +resnext50_QNN-cs_8_gen_2: jp2k7woqp +resnext50_QNN-cs_8_gen_3: jpy14x8lp +resnext50_QNN-cs_8_elite: jp0z1jon5 +resnext50_QNN-cs_8550: jp8q3xjop +resnext50_QNN-cs_x_elite: jgkel46ng +resnext50_QNN-cs_auto_lemans_8255: j5q67y4op +resnext50_QNN-cs_auto_lemans_8775: jglv0x8m5 +resnext50_QNN-cs_auto_lemans_8650: j56y37myp +resnext50_QNN-cs_auto_makena_8295: jp3j497ng +resnext50_QNN-cs_xr_8450: jgo21rwkp +resnext50_ONNX-cs_8_gen_2: jpv61dmr5 +resnext50_ONNX-cs_8_gen_3: jgjv07yeg +resnext50_ONNX-cs_8_elite: jpedrzxv5 +resnext50_ONNX-cs_x_elite: jgz3xmyx5 +resnext50_ONNX_DML_GPU-cs_x_elite: j5wed7zm5 +resnext50_ONNX_DML_NPU-cs_x_elite: jg9l3m28g +resnext101_TFLITE-cs_8_gen_2: jpxk74rl5 +resnext101_TFLITE-cs_8_gen_3: jgn69nqq5 +resnext101_TFLITE-cs_8_elite: jprv40d7g +resnext101_TFLITE-cs_8550: jp2k7wdqp +resnext101_TFLITE-cs_auto_lemans_8255: jpy14x2lp +resnext101_TFLITE-cs_auto_lemans_8775: jp0z1j9n5 +resnext101_TFLITE-cs_auto_lemans_8650: jp8q3xrop +resnext101_TFLITE-cs_auto_makena_8295: jgkel40ng +resnext101_TFLITE-cs_xr_8450: j5q67y1op +resnext101_QNN-cs_8_gen_2: jglv0xqm5 +resnext101_QNN-cs_8_gen_3: j56y370yp +resnext101_QNN-cs_8_elite: jgo21r9kp +resnext101_QNN-cs_8550: jpv61dnr5 +resnext101_QNN-cs_x_elite: jgjv078eg +resnext101_QNN-cs_auto_lemans_8255: jpedrznv5 +resnext101_QNN-cs_auto_lemans_8775: jgz3xm0x5 +resnext101_QNN-cs_auto_lemans_8650: j5wed7rm5 +resnext101_QNN-cs_auto_makena_8295: jg9l3mq8g +resnext101_QNN-cs_xr_8450: jp14djm7p +resnext101_ONNX-cs_8_gen_2: jgdxr3mzp +resnext101_ONNX-cs_8_gen_3: j57yj4895 +resnext101_ONNX-cs_8_elite: jp4lx1215 +resnext101_ONNX-cs_x_elite: jpxk74zl5 +resnext101_ONNX_DML_GPU-cs_x_elite: j5mnwml9p +resnext101_ONNX_DML_NPU-cs_x_elite: jgn69nwq5 +resnet50_TFLITE-cs_8_gen_2: jp0z1jxn5 +resnet50_TFLITE-cs_8_gen_3: jp8q3xkop +resnet50_TFLITE-cs_8_elite: jgkel4kng +resnet50_TFLITE-cs_8550: j5q67ydop +resnet50_TFLITE-cs_auto_lemans_8255: jglv0x9m5 +resnet50_TFLITE-cs_auto_lemans_8775: j56y379yp +resnet50_TFLITE-cs_auto_lemans_8650: jp3j49lng +resnet50_TFLITE-cs_auto_makena_8295: jgo21r7kp +resnet50_TFLITE-cs_xr_8450: jpv61dyr5 +resnet50_QNN-cs_8_gen_2: jgjv076eg +resnet50_QNN-cs_8_gen_3: jpedrz0v5 +resnet50_QNN-cs_8_elite: jgz3xmqx5 +resnet50_QNN-cs_8550: j5wed70m5 +resnet50_QNN-cs_x_elite: jg9l3m78g +resnet50_QNN-cs_auto_lemans_8255: jp14djk7p +resnet50_QNN-cs_auto_lemans_8775: jgdxr3yzp +resnet50_QNN-cs_auto_lemans_8650: j57yj4195 +resnet50_QNN-cs_auto_makena_8295: jp4lx1615 +resnet50_QNN-cs_xr_8450: jpxk748l5 +resnet50_ONNX-cs_8_gen_2: j5mnwm19p +resnet50_ONNX-cs_8_gen_3: jgn69ndq5 +resnet50_ONNX-cs_8_elite: jprv40m7g +resnet50_ONNX-cs_x_elite: jp2k7wqqp +resnet50_ONNX_DML_GPU-cs_x_elite: jpy14xklp +resnet50_ONNX_DML_NPU-cs_x_elite: jp0z1j8n5 +resnet18_TFLITE-cs_8_gen_2: jglv0xdm5 +resnet18_TFLITE-cs_8_gen_3: j56y37xyp +resnet18_TFLITE-cs_8_elite: jp3j49dng +resnet18_TFLITE-cs_8550: jgo21rxkp +resnet18_TFLITE-cs_auto_lemans_8255: jpv61d8r5 +resnet18_TFLITE-cs_auto_lemans_8775: jgjv079eg +resnet18_TFLITE-cs_auto_lemans_8650: jpedrzqv5 +resnet18_TFLITE-cs_auto_makena_8295: jgz3xm6x5 +resnet18_TFLITE-cs_xr_8450: j5wed7km5 +resnet18_QNN-cs_8_gen_2: jg9l3mr8g +resnet18_QNN-cs_8_gen_3: jp14dj97p +resnet18_QNN-cs_8_elite: jgdxr3kzp +resnet18_QNN-cs_8550: j5wed7k45 +resnet18_QNN-cs_x_elite: jg9l3mrmg +resnet18_QNN-cs_auto_lemans_8255: jp14dj9np +resnet18_QNN-cs_auto_lemans_8775: jgdxr3k6p +resnet18_QNN-cs_auto_lemans_8650: j57yj4mn5 +resnet18_QNN-cs_auto_makena_8295: jp4lx1725 +resnet18_QNN-cs_xr_8450: jpxk74q85 +resnet18_ONNX-cs_8_gen_2: j5mnwm77p +resnet18_ONNX-cs_8_gen_3: jgn69n4j5 +resnet18_ONNX-cs_8_elite: jprv40rkg +resnet18_ONNX-cs_x_elite: jp2k7w16p +resnet18_ONNX_DML_GPU-cs_x_elite: jpy14xl0p +resnet18_ONNX_DML_NPU-cs_x_elite: jp0z1jn05 +resnet101_TFLITE-cs_8_gen_2: jglv0xj25 +resnet101_TFLITE-cs_8_gen_3: j56y37knp +resnet101_TFLITE-cs_8_elite: jp3j49ymg +resnet101_TFLITE-cs_8550: jgo21rj1p +resnet101_TFLITE-cs_auto_lemans_8255: jpv61djz5 +resnet101_TFLITE-cs_auto_lemans_8775: jgjv07j1g +resnet101_TFLITE-cs_auto_lemans_8650: jpedrzj85 +resnet101_TFLITE-cs_auto_makena_8295: jgz3xm145 +resnet101_TFLITE-cs_xr_8450: j5wed7j45 +resnet101_QNN-cs_8_gen_2: jg9l3m6mg +resnet101_QNN-cs_8_gen_3: jp14djrnp +resnet101_QNN-cs_8_elite: jp4lxzev5 +resnet101_QNN-cs_8550: j57yj4qn5 +resnet101_QNN-cs_x_elite: jp4lx1z25 +resnet101_QNN-cs_auto_lemans_8255: jpxk74w85 +resnet101_QNN-cs_auto_lemans_8775: j5mnwmj7p +resnet101_QNN-cs_auto_lemans_8650: jgn69njj5 +resnet101_QNN-cs_auto_makena_8295: jprv40zkg +resnet101_QNN-cs_xr_8450: jp2k7w26p +resnet101_ONNX-cs_8_gen_2: jpy14x90p +resnet101_ONNX-cs_8_gen_3: jp0z1jy05 +resnet101_ONNX-cs_8_elite: jp8q3xoqp +resnet101_ONNX-cs_x_elite: jgkel4zvg +resnet101_ONNX_DML_GPU-cs_x_elite: j5q67y8ep +resnet101_ONNX_DML_NPU-cs_x_elite: jglv0xn25 +regnet_TFLITE-cs_8_gen_2: jpv61d3z5 +regnet_TFLITE-cs_8_gen_3: jgjv07x1g +regnet_TFLITE-cs_8_elite: jpedrz985 +regnet_TFLITE-cs_8550: jgz3xme45 +regnet_TFLITE-cs_auto_lemans_8255: j5wed7o45 +regnet_TFLITE-cs_auto_lemans_8775: jg9l3mvmg +regnet_TFLITE-cs_auto_lemans_8650: jp14dj0np +regnet_TFLITE-cs_auto_makena_8295: jgdxr3w6p +regnet_TFLITE-cs_xr_8450: j57yj4zn5 +regnet_QNN-cs_8_gen_2: jp4lx1q25 +regnet_QNN-cs_8_gen_3: jpxk74v85 +regnet_QNN-cs_8_elite: j5mnwmr7p +regnet_QNN-cs_8550: jgn69n2j5 +regnet_QNN-cs_x_elite: jprv40kkg +regnet_QNN-cs_auto_lemans_8255: jp2k7w86p +regnet_QNN-cs_auto_lemans_8775: jpy14xe0p +regnet_QNN-cs_auto_lemans_8650: jp0z1jq05 +regnet_QNN-cs_auto_makena_8295: jp8q3x9qp +regnet_QNN-cs_xr_8450: jgkel4nvg +regnet_ONNX-cs_8_gen_2: j5q67ykep +regnet_ONNX-cs_8_gen_3: jglv0xz25 +regnet_ONNX-cs_8_elite: j56y37jnp +regnet_ONNX-cs_x_elite: jp3j493mg +regnet_ONNX_DML_GPU-cs_x_elite: jgo21r01p +regnet_ONNX_DML_NPU-cs_x_elite: jpv61doz5 +real_esrgan_x4plus_TFLITE-cs_8_gen_2: j5wed7v45 +real_esrgan_x4plus_TFLITE-cs_8_gen_3: jg9l3m1mg +real_esrgan_x4plus_TFLITE-cs_8_elite: jp14djlnp +real_esrgan_x4plus_TFLITE-cs_8550: jgdxr396p +real_esrgan_x4plus_TFLITE-cs_auto_lemans_8255: j57yj4wn5 +real_esrgan_x4plus_TFLITE-cs_auto_lemans_8775: jp4lx1o25 +real_esrgan_x4plus_TFLITE-cs_auto_lemans_8650: jpxk74j85 +real_esrgan_x4plus_TFLITE-cs_auto_makena_8295: j5mnwm27p +real_esrgan_x4plus_TFLITE-cs_xr_8450: jgn69nyj5 +real_esrgan_x4plus_QNN-cs_8_gen_2: jprv40qkg +real_esrgan_x4plus_QNN-cs_8_gen_3: jp2k7w66p +real_esrgan_x4plus_QNN-cs_8_elite: jpy14xw0p +real_esrgan_x4plus_QNN-cs_8550: jp0z1j705 +real_esrgan_x4plus_QNN-cs_x_elite: jp8q3xvqp +real_esrgan_x4plus_QNN-cs_auto_lemans_8255: jgkel4mvg +real_esrgan_x4plus_QNN-cs_auto_lemans_8775: j5q67yoep +real_esrgan_x4plus_QNN-cs_auto_lemans_8650: jglv0xr25 +real_esrgan_x4plus_QNN-cs_auto_makena_8295: j56y37lnp +real_esrgan_x4plus_QNN-cs_xr_8450: jp3j492mg +real_esrgan_x4plus_ONNX-cs_8_gen_2: jgo21rq1p +real_esrgan_x4plus_ONNX-cs_8_gen_3: jpv61dxz5 +real_esrgan_x4plus_ONNX-cs_8_elite: jgjv0741g +real_esrgan_x4plus_ONNX-cs_x_elite: jpedrz385 +real_esrgan_x4plus_ONNX_DML_GPU-cs_x_elite: jgz3xmk45 +real_esrgan_x4plus_ONNX_DML_NPU-cs_x_elite: j5wed7n45 +real_esrgan_general_x4v3_TFLITE-cs_8_gen_2: j5wed7nz5 +real_esrgan_general_x4v3_TFLITE-cs_8_gen_3: jg9l3meqg +real_esrgan_general_x4v3_TFLITE-cs_8_elite: jp14djxkp +real_esrgan_general_x4v3_TFLITE-cs_8550: jgdxr3lkp +real_esrgan_general_x4v3_TFLITE-cs_auto_lemans_8255: j57yj43q5 +real_esrgan_general_x4v3_TFLITE-cs_auto_lemans_8775: jp4lx10q5 +real_esrgan_general_x4v3_TFLITE-cs_auto_lemans_8650: jpxk742j5 +real_esrgan_general_x4v3_TFLITE-cs_auto_makena_8295: j5mnwmyyp +real_esrgan_general_x4v3_TFLITE-cs_xr_8450: jgn69zvv5 +real_esrgan_general_x4v3_QNN-cs_8_gen_2: jprv4l3vg +real_esrgan_general_x4v3_QNN-cs_8_gen_3: jp2k7ryxp +real_esrgan_general_x4v3_QNN-cs_8_elite: jpy14o3rp +real_esrgan_general_x4v3_QNN-cs_8550: jp0z1m025 +real_esrgan_general_x4v3_QNN-cs_x_elite: jp8q3eyzp +real_esrgan_general_x4v3_QNN-cs_auto_lemans_8255: jgkel2xyg +real_esrgan_general_x4v3_QNN-cs_auto_lemans_8775: j5q67lq7p +real_esrgan_general_x4v3_QNN-cs_auto_lemans_8650: jglv0yme5 +real_esrgan_general_x4v3_QNN-cs_auto_makena_8295: j56y384vp +real_esrgan_general_x4v3_QNN-cs_xr_8450: jgo21l64p +real_esrgan_general_x4v3_ONNX-cs_8_gen_2: jpv61lk75 +real_esrgan_general_x4v3_ONNX-cs_8_gen_3: jgjv0rn7g +real_esrgan_general_x4v3_ONNX-cs_8_elite: jpedr7m75 +real_esrgan_general_x4v3_ONNX-cs_x_elite: jgz3xldz5 +real_esrgan_general_x4v3_ONNX_DML_GPU-cs_x_elite: j5wedl6z5 +real_esrgan_general_x4v3_ONNX_DML_NPU-cs_x_elite: jg9l3znqg +quicksrnetsmall_TFLITE-cs_8_gen_2: jp4lxyrq5 +quicksrnetsmall_TFLITE-cs_8_gen_3: jpxk7loj5 +quicksrnetsmall_TFLITE-cs_8_elite: j5mnw0xyp +quicksrnetsmall_TFLITE-cs_8550: jgn69z6v5 +quicksrnetsmall_TFLITE-cs_auto_lemans_8255: jprv4lvvg +quicksrnetsmall_TFLITE-cs_auto_lemans_8775: jp2k7rkxp +quicksrnetsmall_TFLITE-cs_auto_lemans_8650: jpy14o1rp +quicksrnetsmall_TFLITE-cs_auto_makena_8295: jp0z1mz25 +quicksrnetsmall_TFLITE-cs_xr_8450: jp8q3eqzp +quicksrnetsmall_QNN-cs_8_gen_2: jgkel2eyg +quicksrnetsmall_QNN-cs_8_gen_3: j5q67l67p +quicksrnetsmall_QNN-cs_8_elite: jglv0yve5 +quicksrnetsmall_QNN-cs_8550: j56y38yvp +quicksrnetsmall_QNN-cs_x_elite: jp3j4zjxg +quicksrnetsmall_QNN-cs_auto_lemans_8255: jgo21l24p +quicksrnetsmall_QNN-cs_auto_lemans_8775: jpv61l675 +quicksrnetsmall_QNN-cs_auto_lemans_8650: jgjv0rv7g +quicksrnetsmall_QNN-cs_auto_makena_8295: jpedr7d75 +quicksrnetsmall_QNN-cs_xr_8450: jgz3xl3z5 +quicksrnetsmall_ONNX-cs_8_gen_2: j5wedlez5 +quicksrnetsmall_ONNX-cs_8_gen_3: jg9l3zlqg +quicksrnetsmall_ONNX-cs_8_elite: jp14dn4kp +quicksrnetsmall_ONNX-cs_x_elite: jgdxrdxkp +quicksrnetsmall_ONNX_DML_GPU-cs_x_elite: j57yjeyq5 +quicksrnetsmall_ONNX_DML_NPU-cs_x_elite: jp4lxylq5 +quicksrnetmedium_TFLITE-cs_8_gen_2: jprv4l6vg +quicksrnetmedium_TFLITE-cs_8_gen_3: jp2k7rxxp +quicksrnetmedium_TFLITE-cs_8_elite: jpy14ozrp +quicksrnetmedium_TFLITE-cs_8550: jp0z1m425 +quicksrnetmedium_TFLITE-cs_auto_lemans_8255: jp8q3e2zp +quicksrnetmedium_TFLITE-cs_auto_lemans_8775: jgkel2vyg +quicksrnetmedium_TFLITE-cs_auto_lemans_8650: j5q67l07p +quicksrnetmedium_TFLITE-cs_auto_makena_8295: jglv0y4e5 +quicksrnetmedium_TFLITE-cs_xr_8450: j56y382vp +quicksrnetmedium_QNN-cs_8_gen_2: jp3j4znxg +quicksrnetmedium_QNN-cs_8_gen_3: jgo21lz4p +quicksrnetmedium_QNN-cs_8_elite: jpv61lq75 +quicksrnetmedium_QNN-cs_8550: jgjv0rd7g +quicksrnetmedium_QNN-cs_x_elite: jpedr7o75 +quicksrnetmedium_QNN-cs_auto_lemans_8255: jgz3xl2z5 +quicksrnetmedium_QNN-cs_auto_lemans_8775: j5wedlwz5 +quicksrnetmedium_QNN-cs_auto_lemans_8650: jg9l3z0qg +quicksrnetmedium_QNN-cs_auto_makena_8295: jp14dn2kp +quicksrnetmedium_QNN-cs_xr_8450: jgdxrdnkp +quicksrnetmedium_ONNX-cs_8_gen_2: j57yje2q5 +quicksrnetmedium_ONNX-cs_8_gen_3: jp4lxynq5 +quicksrnetmedium_ONNX-cs_8_elite: jpxk7l9j5 +quicksrnetmedium_ONNX-cs_x_elite: j5mnw0eyp +quicksrnetmedium_ONNX_DML_GPU-cs_x_elite: jgn69zlv5 +quicksrnetmedium_ONNX_DML_NPU-cs_x_elite: jprv4l8vg +quicksrnetlarge_TFLITE-cs_8_gen_2: jp8q3e0zp +quicksrnetlarge_TFLITE-cs_8_gen_3: jgkel27yg +quicksrnetlarge_TFLITE-cs_8_elite: j5q67le7p +quicksrnetlarge_TFLITE-cs_8550: jglv0y6e5 +quicksrnetlarge_TFLITE-cs_auto_lemans_8255: j56y38evp +quicksrnetlarge_TFLITE-cs_auto_lemans_8775: jp3j4zvxg +quicksrnetlarge_TFLITE-cs_auto_lemans_8650: jgo21lk4p +quicksrnetlarge_TFLITE-cs_auto_makena_8295: jpv61l075 +quicksrnetlarge_TFLITE-cs_xr_8450: jgjv0rz7g +quicksrnetlarge_QNN-cs_8_gen_2: jpedr7e75 +quicksrnetlarge_QNN-cs_8_gen_3: jgz3xloz5 +quicksrnetlarge_QNN-cs_8_elite: j5wedl2z5 +quicksrnetlarge_QNN-cs_8550: jg9l3zjqg +quicksrnetlarge_QNN-cs_x_elite: jp14dnykp +quicksrnetlarge_QNN-cs_auto_lemans_8255: jgdxrdekp +quicksrnetlarge_QNN-cs_auto_lemans_8775: j5wedl2j5 +quicksrnetlarge_QNN-cs_auto_lemans_8650: jg9l3zjvg +quicksrnetlarge_QNN-cs_auto_makena_8295: jp14dnylp +quicksrnetlarge_QNN-cs_xr_8450: jgdxrdelp +quicksrnetlarge_ONNX-cs_8_gen_2: j57yjelr5 +quicksrnetlarge_ONNX-cs_8_gen_3: jp4lxydl5 +quicksrnetlarge_ONNX-cs_8_elite: jpxk7l695 +quicksrnetlarge_ONNX-cs_x_elite: j5mnw06qp +quicksrnetlarge_ONNX_DML_GPU-cs_x_elite: jgn69zmm5 +quicksrnetlarge_ONNX_DML_NPU-cs_x_elite: jprv4l2eg +posenet_mobilenet_quantized_TFLITE-cs_8_gen_2: jp8q3em8p +posenet_mobilenet_quantized_TFLITE-cs_8_gen_3: jgkel2qog +posenet_mobilenet_quantized_TFLITE-cs_8_elite: j5q67lrmp +posenet_mobilenet_quantized_TFLITE-cs_6490: jglv0y2l5 +posenet_mobilenet_quantized_TFLITE-cs_8250: j56y38z7p +posenet_mobilenet_quantized_TFLITE-cs_8550: jp3j4z1zg +posenet_mobilenet_quantized_TFLITE-cs_auto_lemans_8255: jgo21lndp +posenet_mobilenet_quantized_TFLITE-cs_auto_lemans_8775: jpv61lrm5 +posenet_mobilenet_quantized_TFLITE-cs_auto_lemans_8650: jgjv0r28g +posenet_mobilenet_quantized_TFLITE-cs_auto_makena_8295: j57yjqol5 +posenet_mobilenet_quantized_TFLITE-cs_xr_8450: jgz3xlj65 +posenet_mobilenet_quantized_QNN-cs_8_gen_2: j5wedl3j5 +posenet_mobilenet_quantized_QNN-cs_8_gen_3: jg9l3zyvg +posenet_mobilenet_quantized_QNN-cs_8_elite: jp14dnwlp +posenet_mobilenet_quantized_QNN-cs_6490: jgdxrdqlp +posenet_mobilenet_quantized_QNN-cs_8550: j57yjexr5 +posenet_mobilenet_quantized_QNN-cs_x_elite: jp4lxyvl5 +posenet_mobilenet_quantized_QNN-cs_auto_lemans_8255: jpxk7ly95 +posenet_mobilenet_quantized_QNN-cs_auto_lemans_8775: j5mnw03qp +posenet_mobilenet_quantized_QNN-cs_auto_lemans_8650: jgn69z3m5 +posenet_mobilenet_quantized_QNN-cs_auto_makena_8295: jprv4leeg +posenet_mobilenet_quantized_QNN-cs_xr_8450: jp2k7rlmp +posenet_mobilenet_quantized_ONNX-cs_8_gen_2: jpy14o64p +posenet_mobilenet_quantized_ONNX-cs_8_gen_3: jp0z1mle5 +posenet_mobilenet_quantized_ONNX-cs_8_elite: jp8q3ez8p +posenet_mobilenet_quantized_ONNX-cs_x_elite: jgkel23og +posenet_mobilenet_quantized_ONNX_DML_NPU-cs_x_elite: j5q67l3mp +posenet_mobilenet_TFLITE-cs_8_gen_2: jp3j4zezg +posenet_mobilenet_TFLITE-cs_8_gen_3: jgo21l3dp +posenet_mobilenet_TFLITE-cs_8_elite: jpv61lvm5 +posenet_mobilenet_TFLITE-cs_8550: jgjv0re8g +posenet_mobilenet_TFLITE-cs_auto_lemans_8255: jpedr7k05 +posenet_mobilenet_TFLITE-cs_auto_lemans_8775: jgz3xlr65 +posenet_mobilenet_TFLITE-cs_auto_lemans_8650: j5wedlqj5 +posenet_mobilenet_TFLITE-cs_auto_makena_8295: jg9l3zwvg +posenet_mobilenet_TFLITE-cs_xr_8450: jp14dnelp +posenet_mobilenet_QNN-cs_8_gen_2: jgdxrdolp +posenet_mobilenet_QNN-cs_8_gen_3: j57yjedr5 +posenet_mobilenet_QNN-cs_8_elite: jp4lxywl5 +posenet_mobilenet_QNN-cs_8550: jpxk7l195 +posenet_mobilenet_QNN-cs_x_elite: j5mnw0zqp +posenet_mobilenet_QNN-cs_auto_lemans_8255: jgn69zem5 +posenet_mobilenet_QNN-cs_auto_lemans_8775: jprv4lyeg +posenet_mobilenet_QNN-cs_auto_lemans_8650: jp2k7rmmp +posenet_mobilenet_QNN-cs_auto_makena_8295: jpy14od4p +posenet_mobilenet_QNN-cs_xr_8450: jp0z1mre5 +posenet_mobilenet_ONNX-cs_8_gen_2: jp8q3e78p +posenet_mobilenet_ONNX-cs_8_gen_3: jgkel2yog +posenet_mobilenet_ONNX-cs_8_elite: j5q67l2mp +posenet_mobilenet_ONNX-cs_x_elite: jglv0ykl5 +posenet_mobilenet_ONNX_DML_GPU-cs_x_elite: j56y3817p +posenet_mobilenet_ONNX_DML_NPU-cs_x_elite: jp3j4zmzg +openpose_TFLITE-cs_8_gen_2: jpedr7v05 +openpose_TFLITE-cs_8_gen_3: jgz3xl765 +openpose_TFLITE-cs_8_elite: j5wedl9j5 +openpose_TFLITE-cs_8550: jg9l3z4vg +openpose_TFLITE-cs_auto_lemans_8255: jp14dn8lp +openpose_TFLITE-cs_auto_lemans_8775: jgdxrdvlp +openpose_TFLITE-cs_auto_lemans_8650: j57yjejr5 +openpose_TFLITE-cs_auto_makena_8295: jp4lxyxl5 +openpose_TFLITE-cs_xr_8450: jpxk7l795 +openpose_QNN-cs_8_gen_2: j5mnw0wqp +openpose_QNN-cs_8_gen_3: jprv4l4eg +openpose_QNN-cs_8_elite: jgdxrj6ep +openpose_QNN-cs_8550: jpy14o44p +openpose_QNN-cs_x_elite: jp0z1m1e5 +openpose_QNN-cs_auto_lemans_8255: jp8q3e38p +openpose_QNN-cs_auto_lemans_8775: jgkel2log +openpose_QNN-cs_auto_lemans_8650: j5q67l7mp +openpose_QNN-cs_auto_makena_8295: jglv0y0l5 +openpose_QNN-cs_xr_8450: j56y3837p +openpose_ONNX-cs_8_gen_2: jp3j4z4zg +openpose_ONNX-cs_8_gen_3: jgo21l1dp +openpose_ONNX-cs_8_elite: jpv61l1m5 +openpose_ONNX-cs_x_elite: jgjv0r08g +openpose_ONNX_DML_GPU-cs_x_elite: jpedr7r05 +openpose_ONNX_DML_NPU-cs_x_elite: jgz3xlx65 +openai_clip_TFLITE-cs_8_gen_2_CLIPTextEncoder: jgdxrdrlp +openai_clip_TFLITE-cs_8_gen_2_CLIPImageEncoder: j5wedlm65 +openai_clip_TFLITE-cs_8_gen_3_CLIPTextEncoder: jg9l3z9lg +openai_clip_TFLITE-cs_8_gen_3_CLIPImageEncoder: jp14dnq2p +openai_clip_TFLITE-cs_8_elite_CLIPTextEncoder: jgdxrd7ep +openai_clip_TFLITE-cs_8_elite_CLIPImageEncoder: j57yjevl5 +openai_clip_TFLITE-cs_8550_CLIPTextEncoder: jp4lxyjv5 +openai_clip_TFLITE-cs_8550_CLIPImageEncoder: jpxk7le15 +openai_clip_TFLITE-cs_auto_lemans_8255_CLIPTextEncoder: j5mnw0vwp +openai_clip_TFLITE-cs_auto_lemans_8255_CLIPImageEncoder: jgn69zrr5 +openai_clip_TFLITE-cs_auto_lemans_8775_CLIPTextEncoder: jprv4l19g +openai_clip_TFLITE-cs_auto_lemans_8775_CLIPImageEncoder: jp2k7r34p +openai_clip_TFLITE-cs_auto_lemans_8650_CLIPTextEncoder: jpy14ov7p +openai_clip_TFLITE-cs_auto_lemans_8650_CLIPImageEncoder: jp0z1me65 +openai_clip_TFLITE-cs_auto_makena_8295_CLIPTextEncoder: jp8q3ewxp +openai_clip_TFLITE-cs_auto_makena_8295_CLIPImageEncoder: jp14dro2p +openai_clip_TFLITE-cs_xr_8450_CLIPTextEncoder: j5q67l94p +openai_clip_TFLITE-cs_xr_8450_CLIPImageEncoder: jglv0ye85 +openai_clip_QNN-cs_8_gen_2_CLIPTextEncoder: j56y38q0p +openai_clip_QNN-cs_8_gen_2_CLIPImageEncoder: jp3j4zqlg +openai_clip_QNN-cs_8_gen_3_CLIPTextEncoder: jgo21lexp +openai_clip_QNN-cs_8_gen_3_CLIPImageEncoder: jpv61lzj5 +openai_clip_QNN-cs_8_elite_CLIPTextEncoder: jgjv0rkxg +openai_clip_QNN-cs_8_elite_CLIPImageEncoder: jpedr7415 +openai_clip_QNN-cs_8550_CLIPTextEncoder: jgz3xlvk5 +openai_clip_QNN-cs_8550_CLIPImageEncoder: j5wedlx65 +openai_clip_QNN-cs_x_elite_CLIPTextEncoder: jg9l3z8lg +openai_clip_QNN-cs_x_elite_CLIPImageEncoder: jp14dn32p +openai_clip_QNN-cs_auto_lemans_8255_CLIPTextEncoder: jgdxrd0ep +openai_clip_QNN-cs_auto_lemans_8255_CLIPImageEncoder: j57yje6l5 +openai_clip_QNN-cs_auto_lemans_8775_CLIPTextEncoder: jp4lxy8v5 +openai_clip_QNN-cs_auto_lemans_8775_CLIPImageEncoder: jpxk7lm15 +openai_clip_QNN-cs_auto_lemans_8650_CLIPTextEncoder: j5mnw04wp +openai_clip_QNN-cs_auto_lemans_8650_CLIPImageEncoder: jgn69zxr5 +openai_clip_QNN-cs_auto_makena_8295_CLIPTextEncoder: jprv4l99g +openai_clip_QNN-cs_auto_makena_8295_CLIPImageEncoder: jp2k7rj4p +openai_clip_QNN-cs_xr_8450_CLIPTextEncoder: jpy14on7p +openai_clip_QNN-cs_xr_8450_CLIPImageEncoder: jp0z1mk65 +openai_clip_ONNX-cs_8_gen_2_CLIPTextEncoder: jp8q3e8xp +openai_clip_ONNX-cs_8_gen_2_CLIPImageEncoder: jgkel2d2g +openai_clip_ONNX-cs_8_gen_3_CLIPTextEncoder: j5q67lw4p +openai_clip_ONNX-cs_8_gen_3_CLIPImageEncoder: jglv0y785 +openai_clip_ONNX-cs_8_elite_CLIPTextEncoder: j56y38v0p +openai_clip_ONNX-cs_8_elite_CLIPImageEncoder: jp3j4z8lg +openai_clip_ONNX-cs_x_elite_CLIPTextEncoder: jgo21lmxp +openai_clip_ONNX-cs_x_elite_CLIPImageEncoder: jpv61l4j5 +openai_clip_ONNX_DML_GPU-cs_x_elite_CLIPTextEncoder: jgjv0r1xg +openai_clip_ONNX_DML_GPU-cs_x_elite_CLIPImageEncoder: jpedr7215 +openai_clip_ONNX_DML_NPU-cs_x_elite_CLIPTextEncoder: jgz3xlwk5 +openai_clip_ONNX_DML_NPU-cs_x_elite_CLIPImageEncoder: j5wedl865 +mobilenet_v3_small_TFLITE-cs_8_gen_2: j5mnw0owp +mobilenet_v3_small_TFLITE-cs_8_gen_3: jgn69zor5 +mobilenet_v3_small_TFLITE-cs_8_elite: jprv4lo9g +mobilenet_v3_small_TFLITE-cs_8550: jp2k7r44p +mobilenet_v3_small_TFLITE-cs_auto_lemans_8255: jpy14oq7p +mobilenet_v3_small_TFLITE-cs_auto_lemans_8775: jp0z1md65 +mobilenet_v3_small_TFLITE-cs_auto_lemans_8650: jp8q3e6xp +mobilenet_v3_small_TFLITE-cs_auto_makena_8295: jgkel2o2g +mobilenet_v3_small_TFLITE-cs_xr_8450: j5q67lz4p +mobilenet_v3_small_QNN-cs_8_gen_2: jglv0yo85 +mobilenet_v3_small_QNN-cs_8_gen_3: j56y38r0p +mobilenet_v3_small_QNN-cs_8_elite: jp3j4zxlg +mobilenet_v3_small_QNN-cs_8550: jgo21loxp +mobilenet_v3_small_QNN-cs_x_elite: jpv61lej5 +mobilenet_v3_small_QNN-cs_auto_lemans_8255: jgjv0roxg +mobilenet_v3_small_QNN-cs_auto_lemans_8775: jpedr7815 +mobilenet_v3_small_QNN-cs_auto_lemans_8650: jgz3xl8k5 +mobilenet_v3_small_QNN-cs_auto_makena_8295: j5wedl165 +mobilenet_v3_small_QNN-cs_xr_8450: jg9l3zxlg +mobilenet_v3_small_ONNX-cs_8_gen_2: jp14dnv2p +mobilenet_v3_small_ONNX-cs_8_gen_3: jgdxrdzep +mobilenet_v3_small_ONNX-cs_8_elite: j57yje7l5 +mobilenet_v3_small_ONNX-cs_x_elite: jp4lxy9v5 +mobilenet_v3_small_ONNX_DML_GPU-cs_x_elite: jpxk7ld15 +mobilenet_v3_small_ONNX_DML_NPU-cs_x_elite: j5mnw0dwp +mobilenet_v3_large_TFLITE-cs_8_gen_2: jpy14o77p +mobilenet_v3_large_TFLITE-cs_8_gen_3: jp0z1mv65 +mobilenet_v3_large_TFLITE-cs_8_elite: jp8q3e4xp +mobilenet_v3_large_TFLITE-cs_8550: j5q67lm4p +mobilenet_v3_large_TFLITE-cs_auto_lemans_8255: jglv0y185 +mobilenet_v3_large_TFLITE-cs_auto_lemans_8775: j56y38d0p +mobilenet_v3_large_TFLITE-cs_auto_lemans_8650: jp3j4zwlg +mobilenet_v3_large_TFLITE-cs_auto_makena_8295: jgo21l4xp +mobilenet_v3_large_TFLITE-cs_xr_8450: jpv61l9j5 +mobilenet_v3_large_QNN-cs_8_gen_2: jgjv0rwxg +mobilenet_v3_large_QNN-cs_8_gen_3: jpedr7l15 +mobilenet_v3_large_QNN-cs_8_elite: jgz3xl4k5 +mobilenet_v3_large_QNN-cs_8550: j5wedl465 +mobilenet_v3_large_QNN-cs_x_elite: jg9l3zdlg +mobilenet_v3_large_QNN-cs_auto_lemans_8255: jp14dn62p +mobilenet_v3_large_QNN-cs_auto_lemans_8775: jgdxrd2ep +mobilenet_v3_large_QNN-cs_auto_lemans_8650: j5wedl435 +mobilenet_v3_large_QNN-cs_auto_makena_8295: jg9l3zdwg +mobilenet_v3_large_QNN-cs_xr_8450: jp14dn68p +mobilenet_v3_large_ONNX-cs_8_gen_2: jgdxrd2rp +mobilenet_v3_large_ONNX-cs_8_gen_3: j57yje9v5 +mobilenet_v3_large_ONNX-cs_8_elite: jp4lxy385 +mobilenet_v3_large_ONNX-cs_x_elite: jpxk7lx35 +mobilenet_v3_large_ONNX_DML_GPU-cs_x_elite: j5mnw08dp +mobilenet_v3_large_ONNX_DML_NPU-cs_x_elite: jgn69zkk5 +mobilenet_v2_TFLITE-cs_8_gen_2: jp0z1m695 +mobilenet_v2_TFLITE-cs_8_gen_3: jp8q3e1kp +mobilenet_v2_TFLITE-cs_8_elite: jgkel28wg +mobilenet_v2_TFLITE-cs_8550: j5q67lvnp +mobilenet_v2_TFLITE-cs_auto_lemans_8255: jglv0ylj5 +mobilenet_v2_TFLITE-cs_auto_lemans_8775: j56y38w6p +mobilenet_v2_TFLITE-cs_auto_lemans_8650: jp3j4z63g +mobilenet_v2_TFLITE-cs_auto_makena_8295: jgo21l8qp +mobilenet_v2_TFLITE-cs_xr_8450: jpv61ldk5 +mobilenet_v2_QNN-cs_8_gen_2: jgjv0r7vg +mobilenet_v2_QNN-cs_8_gen_3: jpedr7zo5 +mobilenet_v2_QNN-cs_8_elite: jgz3xlmo5 +mobilenet_v2_QNN-cs_8550: j5wedl735 +mobilenet_v2_QNN-cs_x_elite: jg9l3zmwg +mobilenet_v2_QNN-cs_auto_lemans_8255: jp14dnj8p +mobilenet_v2_QNN-cs_auto_lemans_8775: jgdxrd3rp +mobilenet_v2_QNN-cs_auto_lemans_8650: j57yje4v5 +mobilenet_v2_QNN-cs_auto_makena_8295: jp4lxy185 +mobilenet_v2_QNN-cs_xr_8450: jpxk7l435 +mobilenet_v2_ONNX-cs_8_gen_2: j5mnw0mdp +mobilenet_v2_ONNX-cs_8_gen_3: jgn69znk5 +mobilenet_v2_ONNX-cs_8_elite: jprv4l00g +mobilenet_v2_ONNX-cs_x_elite: jp2k7rwrp +mobilenet_v2_ONNX_DML_GPU-cs_x_elite: jpy14ox8p +mobilenet_v2_ONNX_DML_NPU-cs_x_elite: jp0z1mj95 +mnasnet05_TFLITE-cs_8_gen_2: jglv0yxj5 +mnasnet05_TFLITE-cs_8_gen_3: j56y3876p +mnasnet05_TFLITE-cs_8_elite: jp3j4z93g +mnasnet05_TFLITE-cs_8550: jgo21lrqp +mnasnet05_TFLITE-cs_auto_lemans_8255: jpv61llk5 +mnasnet05_TFLITE-cs_auto_lemans_8775: jgjv0rrvg +mnasnet05_TFLITE-cs_auto_lemans_8650: jpedr77o5 +mnasnet05_TFLITE-cs_auto_makena_8295: jgz3xllo5 +mnasnet05_TFLITE-cs_xr_8450: j5wedll35 +mnasnet05_QNN-cs_8_gen_2: jg9l3zzwg +mnasnet05_QNN-cs_8_gen_3: jp14dnn8p +mnasnet05_QNN-cs_8_elite: jgdxrddrp +mnasnet05_QNN-cs_8550: j57yjeev5 +mnasnet05_QNN-cs_x_elite: jp4lxyy85 +mnasnet05_QNN-cs_auto_lemans_8255: jpxk7ll35 +mnasnet05_QNN-cs_auto_lemans_8775: j5mnw00dp +mnasnet05_QNN-cs_auto_lemans_8650: jgn69zzk5 +mnasnet05_QNN-cs_auto_makena_8295: jprv4ll0g +mnasnet05_QNN-cs_xr_8450: jp2k7rrrp +mnasnet05_ONNX-cs_8_gen_2: jpy14oo8p +mnasnet05_ONNX-cs_8_gen_3: jp0z1mm95 +mnasnet05_ONNX-cs_8_elite: jp8q3eekp +mnasnet05_ONNX-cs_x_elite: jgkel22wg +mnasnet05_ONNX_DML_GPU-cs_x_elite: j5q67llnp +mnasnet05_ONNX_DML_NPU-cs_x_elite: jglv0yyj5 +midas_quantized_TFLITE-cs_8_gen_2: jgjv0r3vg +midas_quantized_TFLITE-cs_8_gen_3: jpedr76o5 +midas_quantized_TFLITE-cs_8_elite: jgz3xlzo5 +midas_quantized_TFLITE-cs_6490: j5wedly35 +midas_quantized_TFLITE-cs_8250: jg9l3zowg +midas_quantized_TFLITE-cs_8550: jp14dno8p +midas_quantized_TFLITE-cs_auto_lemans_8255: jgdxrd6rp +midas_quantized_TFLITE-cs_auto_lemans_8775: j57yjeov5 +midas_quantized_TFLITE-cs_auto_lemans_8650: jp4lxye85 +midas_quantized_TFLITE-cs_auto_makena_8295: jpxk7l035 +midas_quantized_TFLITE-cs_xr_8450: j5mnw09dp +midas_quantized_QNN-cs_8_gen_2: jgn69z1k5 +midas_quantized_QNN-cs_8_gen_3: jprv4lx0g +midas_quantized_QNN-cs_8_elite: jp2k7rorp +midas_quantized_QNN-cs_6490: jpy14o88p +midas_quantized_QNN-cs_8550: jp0z1mo95 +midas_quantized_QNN-cs_x_elite: jp8q3ejkp +midas_quantized_QNN-cs_auto_lemans_8255: jgkel26wg +midas_quantized_QNN-cs_auto_lemans_8775: j5q67l4np +midas_quantized_QNN-cs_auto_lemans_8650: jglv0ywj5 +midas_quantized_QNN-cs_auto_makena_8295: j56y38o6p +midas_quantized_QNN-cs_xr_8450: jp3j4zo3g +midas_quantized_ONNX-cs_8_gen_2: jgo21ldqp +midas_quantized_ONNX-cs_8_gen_3: jpv61lmk5 +midas_quantized_ONNX-cs_8_elite: jgjv0ryvg +midas_quantized_ONNX-cs_x_elite: jpedr7xo5 +midas_quantized_ONNX_DML_NPU-cs_x_elite: jgz3xlyo5 +midas_TFLITE-cs_8_gen_2: jp14dn18p +midas_TFLITE-cs_8_gen_3: jgdxrd4rp +midas_TFLITE-cs_8_elite: j5wedlzm5 +midas_TFLITE-cs_8550: jg9l3z28g +midas_TFLITE-cs_auto_lemans_8255: jp14dn17p +midas_TFLITE-cs_auto_lemans_8775: jgdxrd4zp +midas_TFLITE-cs_auto_lemans_8650: j57yjen95 +midas_TFLITE-cs_auto_makena_8295: jp4lxy415 +midas_TFLITE-cs_xr_8450: jpxk7lrl5 +midas_QNN-cs_8_gen_2: j5mnw0k9p +midas_QNN-cs_8_gen_3: jgn69zqq5 +midas_QNN-cs_8_elite: jprv4ld7g +midas_QNN-cs_8550: jp2k7rdqp +midas_QNN-cs_x_elite: jpy14o2lp +midas_QNN-cs_auto_lemans_8255: jp0z1m9n5 +midas_QNN-cs_auto_lemans_8775: jp8q3erop +midas_QNN-cs_auto_lemans_8650: jgkel20ng +midas_QNN-cs_auto_makena_8295: j5q67l1op +midas_QNN-cs_xr_8450: jglv0yqm5 +midas_ONNX-cs_8_gen_2: j56y380yp +midas_ONNX-cs_8_gen_3: jp3j4zrng +midas_ONNX-cs_8_elite: jgo21l9kp +midas_ONNX-cs_x_elite: jpv61lnr5 +midas_ONNX_DML_GPU-cs_x_elite: jgjv0r8eg +midas_ONNX_DML_NPU-cs_x_elite: jpedr7nv5 +mediapipe_selfie_TFLITE-cs_8_gen_2: jp14dnm7p +mediapipe_selfie_TFLITE-cs_8_gen_3: jgdxrdmzp +mediapipe_selfie_TFLITE-cs_8_elite: j57yje895 +mediapipe_selfie_TFLITE-cs_8550: jp4lxy215 +mediapipe_selfie_TFLITE-cs_auto_lemans_8255: jpxk7lzl5 +mediapipe_selfie_TFLITE-cs_auto_lemans_8775: j5mnw0l9p +mediapipe_selfie_TFLITE-cs_auto_lemans_8650: jgn69zwq5 +mediapipe_selfie_TFLITE-cs_auto_makena_8295: jprv4l77g +mediapipe_selfie_TFLITE-cs_xr_8450: jp2k7rzqp +mediapipe_selfie_QNN-cs_8_gen_2: jpy14oylp +mediapipe_selfie_QNN-cs_8_gen_3: jp0z1mxn5 +mediapipe_selfie_QNN-cs_8_elite: jp8q3ekop +mediapipe_selfie_QNN-cs_8550: jgkel2kng +mediapipe_selfie_QNN-cs_x_elite: j5q67ldop +mediapipe_selfie_QNN-cs_auto_lemans_8255: jglv0y9m5 +mediapipe_selfie_QNN-cs_auto_lemans_8775: j56y389yp +mediapipe_selfie_QNN-cs_auto_lemans_8650: jp3j4zlng +mediapipe_selfie_QNN-cs_auto_makena_8295: jgo21l7kp +mediapipe_selfie_QNN-cs_xr_8450: jpv61lyr5 +mediapipe_selfie_ONNX-cs_8_gen_2: jgjv0r6eg +mediapipe_selfie_ONNX-cs_8_gen_3: jpedr70v5 +mediapipe_selfie_ONNX-cs_8_elite: jgz3xlqx5 +mediapipe_selfie_ONNX-cs_x_elite: j5wedl0m5 +mediapipe_selfie_ONNX_DML_GPU-cs_x_elite: jg9l3z78g +mediapipe_selfie_ONNX_DML_NPU-cs_x_elite: jp14dnk7p +mediapipe_pose_TFLITE-cs_8_gen_2_MediaPipePoseDetector: jpxk7l8l5 +mediapipe_pose_TFLITE-cs_8_gen_2_MediaPipePoseLandmarkDetector: j5mnw019p +mediapipe_pose_TFLITE-cs_8_gen_3_MediaPipePoseDetector: jgn69zdq5 +mediapipe_pose_TFLITE-cs_8_gen_3_MediaPipePoseLandmarkDetector: jprv4lm7g +mediapipe_pose_TFLITE-cs_8_elite_MediaPipePoseDetector: jp2k7rqqp +mediapipe_pose_TFLITE-cs_8_elite_MediaPipePoseLandmarkDetector: jpy14oklp +mediapipe_pose_TFLITE-cs_8550_MediaPipePoseDetector: jp0z1m8n5 +mediapipe_pose_TFLITE-cs_8550_MediaPipePoseLandmarkDetector: jp8q3edop +mediapipe_pose_TFLITE-cs_auto_lemans_8255_MediaPipePoseDetector: jgkel2wng +mediapipe_pose_TFLITE-cs_auto_lemans_8255_MediaPipePoseLandmarkDetector: j5q67lxop +mediapipe_pose_TFLITE-cs_auto_lemans_8775_MediaPipePoseDetector: jglv0ydm5 +mediapipe_pose_TFLITE-cs_auto_lemans_8775_MediaPipePoseLandmarkDetector: j56y38xyp +mediapipe_pose_TFLITE-cs_auto_lemans_8650_MediaPipePoseDetector: jp3j4zdng +mediapipe_pose_TFLITE-cs_auto_lemans_8650_MediaPipePoseLandmarkDetector: jgo21lxkp +mediapipe_pose_TFLITE-cs_auto_makena_8295_MediaPipePoseDetector: jpv61l8r5 +mediapipe_pose_TFLITE-cs_auto_makena_8295_MediaPipePoseLandmarkDetector: jgjv0r9eg +mediapipe_pose_TFLITE-cs_xr_8450_MediaPipePoseDetector: jpedr7qv5 +mediapipe_pose_TFLITE-cs_xr_8450_MediaPipePoseLandmarkDetector: jgz3xl6x5 +mediapipe_pose_QNN-cs_8_gen_2_MediaPipePoseDetector: j5wedlkm5 +mediapipe_pose_QNN-cs_8_gen_2_MediaPipePoseLandmarkDetector: jg9l3zr8g +mediapipe_pose_QNN-cs_8_gen_3_MediaPipePoseDetector: jp14dn97p +mediapipe_pose_QNN-cs_8_gen_3_MediaPipePoseLandmarkDetector: jgdxrdkzp +mediapipe_pose_QNN-cs_8_elite_MediaPipePoseDetector: j57yjem95 +mediapipe_pose_QNN-cs_8_elite_MediaPipePoseLandmarkDetector: jp4lxy715 +mediapipe_pose_QNN-cs_8550_MediaPipePoseDetector: jpxk7lql5 +mediapipe_pose_QNN-cs_8550_MediaPipePoseLandmarkDetector: j5mnw079p +mediapipe_pose_QNN-cs_x_elite_MediaPipePoseDetector: jgn69z4q5 +mediapipe_pose_QNN-cs_x_elite_MediaPipePoseLandmarkDetector: jprv4lr7g +mediapipe_pose_QNN-cs_auto_lemans_8255_MediaPipePoseDetector: jp2k7r1qp +mediapipe_pose_QNN-cs_auto_lemans_8255_MediaPipePoseLandmarkDetector: jpy14ollp +mediapipe_pose_QNN-cs_auto_lemans_8775_MediaPipePoseDetector: jp0z1mwn5 +mediapipe_pose_QNN-cs_auto_lemans_8775_MediaPipePoseLandmarkDetector: jp8q3enop +mediapipe_pose_QNN-cs_auto_lemans_8650_MediaPipePoseDetector: jgkel21ng +mediapipe_pose_QNN-cs_auto_lemans_8650_MediaPipePoseLandmarkDetector: j5q67lnop +mediapipe_pose_QNN-cs_xr_8450_MediaPipePoseDetector: jglv0yjm5 +mediapipe_pose_QNN-cs_xr_8450_MediaPipePoseLandmarkDetector: j56y38kyp +mediapipe_pose_ONNX-cs_8_gen_2_MediaPipePoseDetector: jp3j4zyng +mediapipe_pose_ONNX-cs_8_gen_2_MediaPipePoseLandmarkDetector: jgo21ljkp +mediapipe_pose_ONNX-cs_8_gen_3_MediaPipePoseDetector: jpv61ljr5 +mediapipe_pose_ONNX-cs_8_gen_3_MediaPipePoseLandmarkDetector: jgjv0rjeg +mediapipe_pose_ONNX-cs_8_elite_MediaPipePoseDetector: jpedr7jv5 +mediapipe_pose_ONNX-cs_8_elite_MediaPipePoseLandmarkDetector: jgz3xl1x5 +mediapipe_pose_ONNX-cs_x_elite_MediaPipePoseDetector: j5wedljm5 +mediapipe_pose_ONNX-cs_x_elite_MediaPipePoseLandmarkDetector: jg9l3z68g +mediapipe_pose_ONNX_DML_GPU-cs_x_elite_MediaPipePoseDetector: jp14dnr7p +mediapipe_pose_ONNX_DML_GPU-cs_x_elite_MediaPipePoseLandmarkDetector: jgdxrdjzp +mediapipe_pose_ONNX_DML_NPU-cs_x_elite_MediaPipePoseDetector: j5wedlj45 +mediapipe_pose_ONNX_DML_NPU-cs_x_elite_MediaPipePoseLandmarkDetector: jg9l3z6mg +mediapipe_hand_TFLITE-cs_8_gen_2_MediaPipeHandDetector: jpxk7lw85 +mediapipe_hand_TFLITE-cs_8_gen_2_MediaPipeHandLandmarkDetector: j5mnw0j7p +mediapipe_hand_TFLITE-cs_8_gen_3_MediaPipeHandDetector: jgn69zjj5 +mediapipe_hand_TFLITE-cs_8_gen_3_MediaPipeHandLandmarkDetector: jprv4lzkg +mediapipe_hand_TFLITE-cs_8_elite_MediaPipeHandDetector: jp2k7r26p +mediapipe_hand_TFLITE-cs_8_elite_MediaPipeHandLandmarkDetector: jpy14o90p +mediapipe_hand_TFLITE-cs_8550_MediaPipeHandDetector: jp0z1my05 +mediapipe_hand_TFLITE-cs_8550_MediaPipeHandLandmarkDetector: jp8q3eoqp +mediapipe_hand_TFLITE-cs_auto_lemans_8255_MediaPipeHandDetector: jgkel2zvg +mediapipe_hand_TFLITE-cs_auto_lemans_8255_MediaPipeHandLandmarkDetector: j5q67l8ep +mediapipe_hand_TFLITE-cs_auto_lemans_8775_MediaPipeHandDetector: jglv0yn25 +mediapipe_hand_TFLITE-cs_auto_lemans_8775_MediaPipeHandLandmarkDetector: j56y386np +mediapipe_hand_TFLITE-cs_auto_lemans_8650_MediaPipeHandDetector: jp3j4zkmg +mediapipe_hand_TFLITE-cs_auto_lemans_8650_MediaPipeHandLandmarkDetector: jgo21ly1p +mediapipe_hand_TFLITE-cs_auto_makena_8295_MediaPipeHandDetector: jpv61l3z5 +mediapipe_hand_TFLITE-cs_auto_makena_8295_MediaPipeHandLandmarkDetector: jgjv0rx1g +mediapipe_hand_TFLITE-cs_xr_8450_MediaPipeHandDetector: jpedr7985 +mediapipe_hand_TFLITE-cs_xr_8450_MediaPipeHandLandmarkDetector: jgz3xle45 +mediapipe_hand_QNN-cs_8_gen_2_MediaPipeHandDetector: j5wedlo45 +mediapipe_hand_QNN-cs_8_gen_2_MediaPipeHandLandmarkDetector: jg9l3zvmg +mediapipe_hand_QNN-cs_8_gen_3_MediaPipeHandDetector: jp14dn0np +mediapipe_hand_QNN-cs_8_gen_3_MediaPipeHandLandmarkDetector: jgdxrdw6p +mediapipe_hand_QNN-cs_8_elite_MediaPipeHandDetector: j57yjezn5 +mediapipe_hand_QNN-cs_8_elite_MediaPipeHandLandmarkDetector: jp4lxyq25 +mediapipe_hand_QNN-cs_8550_MediaPipeHandDetector: jpxk7lv85 +mediapipe_hand_QNN-cs_8550_MediaPipeHandLandmarkDetector: j5mnw0r7p +mediapipe_hand_QNN-cs_x_elite_MediaPipeHandDetector: jgn69z2j5 +mediapipe_hand_QNN-cs_x_elite_MediaPipeHandLandmarkDetector: jprv4lkkg +mediapipe_hand_QNN-cs_auto_lemans_8255_MediaPipeHandDetector: jp2k7r86p +mediapipe_hand_QNN-cs_auto_lemans_8255_MediaPipeHandLandmarkDetector: jpy14oe0p +mediapipe_hand_QNN-cs_auto_lemans_8775_MediaPipeHandDetector: jp0z1mq05 +mediapipe_hand_QNN-cs_auto_lemans_8775_MediaPipeHandLandmarkDetector: jp8q3e9qp +mediapipe_hand_QNN-cs_auto_lemans_8650_MediaPipeHandDetector: jgkel2nvg +mediapipe_hand_QNN-cs_auto_lemans_8650_MediaPipeHandLandmarkDetector: j5q67lkep +mediapipe_hand_QNN-cs_xr_8450_MediaPipeHandDetector: jglv0yz25 +mediapipe_hand_QNN-cs_xr_8450_MediaPipeHandLandmarkDetector: j56y38jnp +mediapipe_hand_ONNX-cs_8_gen_2_MediaPipeHandDetector: jp3j4z3mg +mediapipe_hand_ONNX-cs_8_gen_2_MediaPipeHandLandmarkDetector: jgo21l01p +mediapipe_hand_ONNX-cs_8_gen_3_MediaPipeHandDetector: jpv61loz5 +mediapipe_hand_ONNX-cs_8_gen_3_MediaPipeHandLandmarkDetector: jgjv0rm1g +mediapipe_hand_ONNX-cs_8_elite_MediaPipeHandDetector: jpedr7185 +mediapipe_hand_ONNX-cs_8_elite_MediaPipeHandLandmarkDetector: jgz3xl945 +mediapipe_hand_ONNX-cs_x_elite_MediaPipeHandDetector: j5wedlv45 +mediapipe_hand_ONNX-cs_x_elite_MediaPipeHandLandmarkDetector: jg9l3z1mg +mediapipe_hand_ONNX_DML_GPU-cs_x_elite_MediaPipeHandDetector: jp14dnlnp +mediapipe_hand_ONNX_DML_GPU-cs_x_elite_MediaPipeHandLandmarkDetector: jgdxrd96p +mediapipe_hand_ONNX_DML_NPU-cs_x_elite_MediaPipeHandDetector: j57yjewn5 +mediapipe_hand_ONNX_DML_NPU-cs_x_elite_MediaPipeHandLandmarkDetector: jp4lxyo25 +mediapipe_face_quantized_TFLITE-cs_8_gen_2_MediaPipeFaceDetector: jp2k7r66p +mediapipe_face_quantized_TFLITE-cs_8_gen_2_MediaPipeFaceLandmarkDetector: jpy14ow0p +mediapipe_face_quantized_TFLITE-cs_8_gen_3_MediaPipeFaceDetector: jp0z1m705 +mediapipe_face_quantized_TFLITE-cs_8_gen_3_MediaPipeFaceLandmarkDetector: jp8q3evqp +mediapipe_face_quantized_TFLITE-cs_8_elite_MediaPipeFaceDetector: jgkel2mvg +mediapipe_face_quantized_TFLITE-cs_8_elite_MediaPipeFaceLandmarkDetector: jglv0j885 +mediapipe_face_quantized_TFLITE-cs_6490_MediaPipeFaceDetector: jglv0yr25 +mediapipe_face_quantized_TFLITE-cs_6490_MediaPipeFaceLandmarkDetector: j56y38lnp +mediapipe_face_quantized_TFLITE-cs_8250_MediaPipeFaceDetector: jp3j4z2mg +mediapipe_face_quantized_TFLITE-cs_8250_MediaPipeFaceLandmarkDetector: jgo21lq1p +mediapipe_face_quantized_TFLITE-cs_8550_MediaPipeFaceDetector: jpv61lxz5 +mediapipe_face_quantized_TFLITE-cs_8550_MediaPipeFaceLandmarkDetector: jgjv0r41g +mediapipe_face_quantized_TFLITE-cs_auto_lemans_8255_MediaPipeFaceDetector: jpedr7385 +mediapipe_face_quantized_TFLITE-cs_auto_lemans_8255_MediaPipeFaceLandmarkDetector: jgz3xlk45 +mediapipe_face_quantized_TFLITE-cs_auto_lemans_8775_MediaPipeFaceDetector: j5wedln45 +mediapipe_face_quantized_TFLITE-cs_auto_lemans_8775_MediaPipeFaceLandmarkDetector: jg9l3zemg +mediapipe_face_quantized_TFLITE-cs_auto_lemans_8650_MediaPipeFaceDetector: jp14dnxnp +mediapipe_face_quantized_TFLITE-cs_auto_lemans_8650_MediaPipeFaceLandmarkDetector: jgdxrdl6p +mediapipe_face_quantized_TFLITE-cs_auto_makena_8295_MediaPipeFaceDetector: j57yje3n5 +mediapipe_face_quantized_TFLITE-cs_auto_makena_8295_MediaPipeFaceLandmarkDetector: jp4lxy025 +mediapipe_face_quantized_TFLITE-cs_xr_8450_MediaPipeFaceDetector: jpxk7l285 +mediapipe_face_quantized_TFLITE-cs_xr_8450_MediaPipeFaceLandmarkDetector: j5mnw0y7p +mediapipe_face_quantized_QNN-cs_8_gen_2_MediaPipeFaceDetector: jgn69z8j5 +mediapipe_face_quantized_QNN-cs_8_gen_2_MediaPipeFaceLandmarkDetector: jprv4ljkg +mediapipe_face_quantized_QNN-cs_8_gen_3_MediaPipeFaceDetector: jp2k7rn6p +mediapipe_face_quantized_QNN-cs_8_gen_3_MediaPipeFaceLandmarkDetector: jpy14o00p +mediapipe_face_quantized_QNN-cs_8_elite_MediaPipeFaceDetector: jp0z1o005 +mediapipe_face_quantized_QNN-cs_8_elite_MediaPipeFaceLandmarkDetector: jp8q3jyqp +mediapipe_face_quantized_QNN-cs_6490_MediaPipeFaceDetector: jgkel6xvg +mediapipe_face_quantized_QNN-cs_6490_MediaPipeFaceLandmarkDetector: j5q674qep +mediapipe_face_quantized_QNN-cs_8550_MediaPipeFaceDetector: jglv0wm25 +mediapipe_face_quantized_QNN-cs_8550_MediaPipeFaceLandmarkDetector: j56y3o4np +mediapipe_face_quantized_QNN-cs_x_elite_MediaPipeFaceDetector: jp3j4o0mg +mediapipe_face_quantized_QNN-cs_x_elite_MediaPipeFaceLandmarkDetector: jgo21d61p +mediapipe_face_quantized_QNN-cs_auto_lemans_8255_MediaPipeFaceDetector: jpv612kz5 +mediapipe_face_quantized_QNN-cs_auto_lemans_8255_MediaPipeFaceLandmarkDetector: jgjv03n1g +mediapipe_face_quantized_QNN-cs_auto_lemans_8775_MediaPipeFaceDetector: jpedr6m85 +mediapipe_face_quantized_QNN-cs_auto_lemans_8775_MediaPipeFaceLandmarkDetector: jgz3xzd45 +mediapipe_face_quantized_QNN-cs_auto_lemans_8650_MediaPipeFaceDetector: j5wedy645 +mediapipe_face_quantized_QNN-cs_auto_lemans_8650_MediaPipeFaceLandmarkDetector: jg9l3onmg +mediapipe_face_quantized_QNN-cs_auto_makena_8295_MediaPipeFaceDetector: jp14doznp +mediapipe_face_quantized_QNN-cs_auto_makena_8295_MediaPipeFaceLandmarkDetector: jgdxr616p +mediapipe_face_quantized_QNN-cs_xr_8450_MediaPipeFaceDetector: j5wedy6z5 +mediapipe_face_quantized_QNN-cs_xr_8450_MediaPipeFaceLandmarkDetector: jg9l3onqg +mediapipe_face_quantized_ONNX-cs_8_gen_2_MediaPipeFaceDetector: jp14dozkp +mediapipe_face_quantized_ONNX-cs_8_gen_2_MediaPipeFaceLandmarkDetector: jgdxr61kp +mediapipe_face_quantized_ONNX-cs_8_gen_3_MediaPipeFaceDetector: j57yjorq5 +mediapipe_face_quantized_ONNX-cs_8_gen_3_MediaPipeFaceLandmarkDetector: jp4lxerq5 +mediapipe_face_quantized_ONNX-cs_8_elite_MediaPipeFaceDetector: jpxk70oj5 +mediapipe_face_quantized_ONNX-cs_8_elite_MediaPipeFaceLandmarkDetector: j5mnw9xyp +mediapipe_face_quantized_ONNX-cs_x_elite_MediaPipeFaceDetector: jgn6916v5 +mediapipe_face_quantized_ONNX-cs_x_elite_MediaPipeFaceLandmarkDetector: jprv4xvvg +mediapipe_face_quantized_ONNX_DML_NPU-cs_x_elite_MediaPipeFaceDetector: jp2k7okxp +mediapipe_face_quantized_ONNX_DML_NPU-cs_x_elite_MediaPipeFaceLandmarkDetector: jpy1481rp +mediapipe_face_TFLITE-cs_8_gen_2_MediaPipeFaceDetector: jglv0wve5 +mediapipe_face_TFLITE-cs_8_gen_2_MediaPipeFaceLandmarkDetector: j56y3oyvp +mediapipe_face_TFLITE-cs_8_gen_3_MediaPipeFaceDetector: jp3j4ojxg +mediapipe_face_TFLITE-cs_8_gen_3_MediaPipeFaceLandmarkDetector: jgo21d24p +mediapipe_face_TFLITE-cs_8_elite_MediaPipeFaceDetector: jpv612675 +mediapipe_face_TFLITE-cs_8_elite_MediaPipeFaceLandmarkDetector: jgjv03v7g +mediapipe_face_TFLITE-cs_8550_MediaPipeFaceDetector: jpedr6d75 +mediapipe_face_TFLITE-cs_8550_MediaPipeFaceLandmarkDetector: jgz3xz3z5 +mediapipe_face_TFLITE-cs_auto_lemans_8255_MediaPipeFaceDetector: j5wedyez5 +mediapipe_face_TFLITE-cs_auto_lemans_8255_MediaPipeFaceLandmarkDetector: jg9l3olqg +mediapipe_face_TFLITE-cs_auto_lemans_8775_MediaPipeFaceDetector: jp14do4kp +mediapipe_face_TFLITE-cs_auto_lemans_8775_MediaPipeFaceLandmarkDetector: jgdxr6xkp +mediapipe_face_TFLITE-cs_auto_lemans_8650_MediaPipeFaceDetector: j57yjoyq5 +mediapipe_face_TFLITE-cs_auto_lemans_8650_MediaPipeFaceLandmarkDetector: jp4lxelq5 +mediapipe_face_TFLITE-cs_auto_makena_8295_MediaPipeFaceDetector: jpxk70kj5 +mediapipe_face_TFLITE-cs_auto_makena_8295_MediaPipeFaceLandmarkDetector: j5mnw9nyp +mediapipe_face_TFLITE-cs_xr_8450_MediaPipeFaceDetector: jgn6910v5 +mediapipe_face_TFLITE-cs_xr_8450_MediaPipeFaceLandmarkDetector: jprv4x6vg +mediapipe_face_QNN-cs_8_gen_2_MediaPipeFaceDetector: jp2k7oxxp +mediapipe_face_QNN-cs_8_gen_2_MediaPipeFaceLandmarkDetector: jpy148zrp +mediapipe_face_QNN-cs_8_gen_3_MediaPipeFaceDetector: jp0z1o425 +mediapipe_face_QNN-cs_8_gen_3_MediaPipeFaceLandmarkDetector: jp8q3j2zp +mediapipe_face_QNN-cs_8_elite_MediaPipeFaceDetector: jgkel6vyg +mediapipe_face_QNN-cs_8_elite_MediaPipeFaceLandmarkDetector: j5q67407p +mediapipe_face_QNN-cs_8550_MediaPipeFaceDetector: jglv0w4e5 +mediapipe_face_QNN-cs_8550_MediaPipeFaceLandmarkDetector: j56y3o2vp +mediapipe_face_QNN-cs_x_elite_MediaPipeFaceDetector: jp3j4onxg +mediapipe_face_QNN-cs_x_elite_MediaPipeFaceLandmarkDetector: jgo21dz4p +mediapipe_face_QNN-cs_auto_lemans_8255_MediaPipeFaceDetector: jpv612q75 +mediapipe_face_QNN-cs_auto_lemans_8255_MediaPipeFaceLandmarkDetector: jgjv03d7g +mediapipe_face_QNN-cs_auto_lemans_8775_MediaPipeFaceDetector: jpedr6o75 +mediapipe_face_QNN-cs_auto_lemans_8775_MediaPipeFaceLandmarkDetector: jgz3xz2z5 +mediapipe_face_QNN-cs_auto_lemans_8650_MediaPipeFaceDetector: j5wedywz5 +mediapipe_face_QNN-cs_auto_lemans_8650_MediaPipeFaceLandmarkDetector: jg9l3o0qg +mediapipe_face_QNN-cs_auto_makena_8295_MediaPipeFaceDetector: jp14do2kp +mediapipe_face_QNN-cs_auto_makena_8295_MediaPipeFaceLandmarkDetector: jgdxr6nkp +mediapipe_face_QNN-cs_xr_8450_MediaPipeFaceDetector: j57yjo2q5 +mediapipe_face_QNN-cs_xr_8450_MediaPipeFaceLandmarkDetector: jp4lxenq5 +mediapipe_face_ONNX-cs_8_gen_2_MediaPipeFaceDetector: jpxk709j5 +mediapipe_face_ONNX-cs_8_gen_2_MediaPipeFaceLandmarkDetector: j5mnw9eyp +mediapipe_face_ONNX-cs_8_gen_3_MediaPipeFaceDetector: jgn691lv5 +mediapipe_face_ONNX-cs_8_gen_3_MediaPipeFaceLandmarkDetector: jprv4x8vg +mediapipe_face_ONNX-cs_8_elite_MediaPipeFaceDetector: jp2k7o0xp +mediapipe_face_ONNX-cs_8_elite_MediaPipeFaceLandmarkDetector: jpy148rrp +mediapipe_face_ONNX-cs_x_elite_MediaPipeFaceDetector: jp0z1o325 +mediapipe_face_ONNX-cs_x_elite_MediaPipeFaceLandmarkDetector: jp8q3j0zp +mediapipe_face_ONNX_DML_GPU-cs_x_elite_MediaPipeFaceDetector: jgkel67yg +mediapipe_face_ONNX_DML_GPU-cs_x_elite_MediaPipeFaceLandmarkDetector: j5q674e7p +mediapipe_face_ONNX_DML_NPU-cs_x_elite_MediaPipeFaceDetector: jglv0w6e5 +mediapipe_face_ONNX_DML_NPU-cs_x_elite_MediaPipeFaceLandmarkDetector: j56y3oevp +litehrnet_TFLITE-cs_8_gen_2: jp14doykp +litehrnet_TFLITE-cs_8_gen_3: jgdxr6ekp +litehrnet_TFLITE-cs_8_elite: j57yjo0q5 +litehrnet_TFLITE-cs_8550: jp4lxekq5 +litehrnet_TFLITE-cs_auto_lemans_8255: jpxk70nj5 +litehrnet_TFLITE-cs_auto_lemans_8775: j5mnw9qyp +litehrnet_TFLITE-cs_auto_lemans_8650: jgn691mv5 +litehrnet_TFLITE-cs_auto_makena_8295: jprv4x2vg +litehrnet_TFLITE-cs_xr_8450: jp2k7o9xp +litehrnet_QNN-cs_8_gen_2: jpy148jrp +litehrnet_QNN-cs_8_gen_3: jp0z1o225 +litehrnet_QNN-cs_8_elite: jp8q3jmzp +litehrnet_QNN-cs_xr_8450: jgkel6qyg +litehrnet_ONNX-cs_8_gen_2: j5q674r7p +litehrnet_ONNX-cs_8_gen_3: jglv0w2e5 +litehrnet_ONNX-cs_8_elite: j56y3ozvp +litehrnet_ONNX-cs_x_elite: jp3j4o1xg +litehrnet_ONNX_DML_GPU-cs_x_elite: jgo21dn4p +litehrnet_ONNX_DML_NPU-cs_x_elite: jpv612r75 +lama_dilated_TFLITE-cs_8_gen_2: jgz3xzjz5 +lama_dilated_TFLITE-cs_8_gen_3: j5wedy3z5 +lama_dilated_TFLITE-cs_8_elite: jg9l3oyqg +lama_dilated_TFLITE-cs_8550: jp14dowkp +lama_dilated_TFLITE-cs_auto_lemans_8255: jgdxr6qkp +lama_dilated_TFLITE-cs_auto_lemans_8775: j5wedy3j5 +lama_dilated_TFLITE-cs_auto_lemans_8650: jg9l3oyvg +lama_dilated_TFLITE-cs_auto_makena_8295: jp14dowlp +lama_dilated_TFLITE-cs_xr_8450: jgdxr6qlp +lama_dilated_QNN-cs_8_gen_2: j57yjoxr5 +lama_dilated_QNN-cs_8_gen_3: jp4lxevl5 +lama_dilated_QNN-cs_8_elite: jpxk70y95 +lama_dilated_QNN-cs_8550: j5mnw93qp +lama_dilated_QNN-cs_x_elite: jgn6913m5 +lama_dilated_QNN-cs_auto_lemans_8255: jprv4xeeg +lama_dilated_QNN-cs_auto_lemans_8775: jp2k7olmp +lama_dilated_QNN-cs_auto_lemans_8650: jpy14864p +lama_dilated_QNN-cs_auto_makena_8295: jp8q3jz8p +lama_dilated_QNN-cs_xr_8450: jgkel63og +lama_dilated_ONNX-cs_8_gen_2: j5q6743mp +lama_dilated_ONNX-cs_8_gen_3: jglv0w3l5 +lama_dilated_ONNX-cs_8_elite: j56y3on7p +lama_dilated_ONNX-cs_x_elite: jp3j4oezg +lama_dilated_ONNX_DML_GPU-cs_x_elite: jgo21d3dp +lama_dilated_ONNX_DML_NPU-cs_x_elite: jpv612vm5 +inception_v3_TFLITE-cs_8_gen_2: jgz3xzr65 +inception_v3_TFLITE-cs_8_gen_3: j5wedyqj5 +inception_v3_TFLITE-cs_8_elite: j5wedjy65 +inception_v3_TFLITE-cs_8550: jp14doelp +inception_v3_TFLITE-cs_auto_lemans_8255: jgdxr6olp +inception_v3_TFLITE-cs_auto_lemans_8775: j57yjodr5 +inception_v3_TFLITE-cs_auto_lemans_8650: jp4lxewl5 +inception_v3_TFLITE-cs_auto_makena_8295: jpxk70195 +inception_v3_TFLITE-cs_xr_8450: j5mnw9zqp +inception_v3_QNN-cs_8_gen_2: jgn691em5 +inception_v3_QNN-cs_8_gen_3: jprv4xyeg +inception_v3_QNN-cs_8_elite: jp2k7ommp +inception_v3_QNN-cs_8550: jpy148d4p +inception_v3_QNN-cs_x_elite: jp0z1ore5 +inception_v3_QNN-cs_auto_lemans_8255: jp8q3j78p +inception_v3_QNN-cs_auto_lemans_8775: jgkel6yog +inception_v3_QNN-cs_auto_lemans_8650: j5q6742mp +inception_v3_QNN-cs_auto_makena_8295: jglv0wkl5 +inception_v3_QNN-cs_xr_8450: j56y3o17p +inception_v3_ONNX-cs_8_gen_2: jp3j4omzg +inception_v3_ONNX-cs_8_gen_3: jgo21dvdp +inception_v3_ONNX-cs_8_elite: jpv612wm5 +inception_v3_ONNX-cs_x_elite: jgjv03l8g +inception_v3_ONNX_DML_GPU-cs_x_elite: jpedr6v05 +inception_v3_ONNX_DML_NPU-cs_x_elite: jgz3xz765 +huggingface_wavlm_base_plus_TFLITE-cs_8_gen_2: jgdxr6vlp +huggingface_wavlm_base_plus_TFLITE-cs_8_gen_3: j57yjojr5 +huggingface_wavlm_base_plus_TFLITE-cs_8_elite: jp4lxexl5 +huggingface_wavlm_base_plus_TFLITE-cs_8550: jpxk70795 +huggingface_wavlm_base_plus_TFLITE-cs_auto_lemans_8255: j5mnw9wqp +huggingface_wavlm_base_plus_TFLITE-cs_auto_lemans_8775: jgn6919m5 +huggingface_wavlm_base_plus_TFLITE-cs_auto_lemans_8650: jprv4x4eg +huggingface_wavlm_base_plus_TFLITE-cs_auto_makena_8295: jp2k7o7mp +huggingface_wavlm_base_plus_TFLITE-cs_xr_8450: jpy14844p +huggingface_wavlm_base_plus_QNN-cs_8_gen_2: jp0z1o1e5 +huggingface_wavlm_base_plus_QNN-cs_8_gen_3: jp8q3j38p +huggingface_wavlm_base_plus_QNN-cs_8_elite: jgkel6log +huggingface_wavlm_base_plus_QNN-cs_xr_8450: j5q6747mp +huggingface_wavlm_base_plus_ONNX-cs_8_gen_2: jglv0w0l5 +huggingface_wavlm_base_plus_ONNX-cs_8_gen_3: j56y3o37p +huggingface_wavlm_base_plus_ONNX-cs_8_elite: jp3j4o4zg +huggingface_wavlm_base_plus_ONNX-cs_x_elite: jgo21d1dp +huggingface_wavlm_base_plus_ONNX_DML_GPU-cs_x_elite: jpv6121m5 +huggingface_wavlm_base_plus_ONNX_DML_NPU-cs_x_elite: jgjv0308g +hrnet_pose_quantized_TFLITE-cs_8_gen_2: jgz3xzx65 +hrnet_pose_quantized_TFLITE-cs_8_gen_3: j5wedydj5 +hrnet_pose_quantized_TFLITE-cs_8_elite: jg9l3o3vg +hrnet_pose_quantized_TFLITE-cs_6490: jp14dodlp +hrnet_pose_quantized_TFLITE-cs_8250: jgdxr6rlp +hrnet_pose_quantized_TFLITE-cs_8550: j57yjovr5 +hrnet_pose_quantized_TFLITE-cs_auto_lemans_8255: jp4lxejl5 +hrnet_pose_quantized_TFLITE-cs_auto_lemans_8775: jpxk70e95 +hrnet_pose_quantized_TFLITE-cs_auto_lemans_8650: j5mnw9vqp +hrnet_pose_quantized_TFLITE-cs_auto_makena_8295: jgn691rm5 +hrnet_pose_quantized_TFLITE-cs_xr_8450: jprv4x1eg +hrnet_pose_quantized_QNN-cs_8_gen_2: jp2k7o3mp +hrnet_pose_quantized_QNN-cs_8_gen_3: jpy148v4p +hrnet_pose_quantized_QNN-cs_8_elite: jp0z1oee5 +hrnet_pose_quantized_QNN-cs_6490: jp8q3jw8p +hrnet_pose_quantized_QNN-cs_8550: jgkel6rog +hrnet_pose_quantized_QNN-cs_x_elite: j5q6749mp +hrnet_pose_quantized_QNN-cs_auto_lemans_8255: jglv0wel5 +hrnet_pose_quantized_QNN-cs_auto_lemans_8775: j56y3oq7p +hrnet_pose_quantized_QNN-cs_auto_lemans_8650: jgo21dedp +hrnet_pose_quantized_QNN-cs_auto_makena_8295: jpv612zm5 +hrnet_pose_quantized_QNN-cs_xr_8450: jgjv03k8g +hrnet_pose_quantized_ONNX-cs_8_gen_2: jpedr6405 +hrnet_pose_quantized_ONNX-cs_8_gen_3: jgz3xzv65 +hrnet_pose_quantized_ONNX-cs_8_elite: j5wedymj5 +hrnet_pose_quantized_ONNX-cs_x_elite: jg9l3o9vg +hrnet_pose_quantized_ONNX_DML_NPU-cs_x_elite: jp14doqlp +hrnet_pose_TFLITE-cs_8_gen_2: jg9l3o8lg +hrnet_pose_TFLITE-cs_8_gen_3: jp14do32p +hrnet_pose_TFLITE-cs_8_elite: jgdxr60ep +hrnet_pose_TFLITE-cs_8550: j57yjo6l5 +hrnet_pose_TFLITE-cs_auto_lemans_8255: jp4lxe8v5 +hrnet_pose_TFLITE-cs_auto_lemans_8775: jpxk70m15 +hrnet_pose_TFLITE-cs_auto_lemans_8650: j5mnw94wp +hrnet_pose_TFLITE-cs_auto_makena_8295: jgn691xr5 +hrnet_pose_TFLITE-cs_xr_8450: jprv4x99g +hrnet_pose_QNN-cs_8_gen_2: jp2k7oj4p +hrnet_pose_QNN-cs_8_gen_3: jpy148n7p +hrnet_pose_QNN-cs_8_elite: jp0z1ok65 +hrnet_pose_QNN-cs_8550: jp8q3j8xp +hrnet_pose_QNN-cs_x_elite: jgkel6d2g +hrnet_pose_QNN-cs_auto_lemans_8255: j5q674w4p +hrnet_pose_QNN-cs_auto_lemans_8775: jglv0w785 +hrnet_pose_QNN-cs_auto_lemans_8650: j56y3ov0p +hrnet_pose_QNN-cs_auto_makena_8295: jp3j4o8lg +hrnet_pose_QNN-cs_xr_8450: jgo21dmxp +hrnet_pose_ONNX-cs_8_gen_2: jpv6124j5 +hrnet_pose_ONNX-cs_8_gen_3: jgjv031xg +hrnet_pose_ONNX-cs_8_elite: jpedr6215 +hrnet_pose_ONNX-cs_x_elite: jgz3xzwk5 +hrnet_pose_ONNX_DML_GPU-cs_x_elite: j5wedy865 +hrnet_pose_ONNX_DML_NPU-cs_x_elite: jg9l3oklg +googlenet_TFLITE-cs_8_gen_2: jp4lxemv5 +googlenet_TFLITE-cs_8_gen_3: jpxk70315 +googlenet_TFLITE-cs_8_elite: j5mnw9owp +googlenet_TFLITE-cs_8550: jgn691or5 +googlenet_TFLITE-cs_auto_lemans_8255: jprv4xo9g +googlenet_TFLITE-cs_auto_lemans_8775: jp2k7o44p +googlenet_TFLITE-cs_auto_lemans_8650: jpy148q7p +googlenet_TFLITE-cs_auto_makena_8295: jp0z1od65 +googlenet_TFLITE-cs_xr_8450: jp8q3j6xp +googlenet_QNN-cs_8_gen_2: jgkel6o2g +googlenet_QNN-cs_8_gen_3: j5q674z4p +googlenet_QNN-cs_8_elite: jglv0wo85 +googlenet_QNN-cs_8550: j56y3or0p +googlenet_QNN-cs_x_elite: jp3j4oxlg +googlenet_QNN-cs_auto_lemans_8255: jgo21doxp +googlenet_QNN-cs_auto_lemans_8775: jpv612ej5 +googlenet_QNN-cs_auto_lemans_8650: jgjv03oxg +googlenet_QNN-cs_auto_makena_8295: jpedr6815 +googlenet_QNN-cs_xr_8450: jgz3xz8k5 +googlenet_ONNX-cs_8_gen_2: j5wedy165 +googlenet_ONNX-cs_8_gen_3: jg9l3oxlg +googlenet_ONNX-cs_8_elite: jp14dov2p +googlenet_ONNX-cs_x_elite: jgdxr6zep +googlenet_ONNX_DML_GPU-cs_x_elite: j57yjo7l5 +googlenet_ONNX_DML_NPU-cs_x_elite: jp4lxe9v5 +gear_guard_net_quantized_TFLITE-cs_8_gen_2: jprv4xn9g +gear_guard_net_quantized_TFLITE-cs_8_gen_3: jp2k7ov4p +gear_guard_net_quantized_TFLITE-cs_8_elite: jpy14877p +gear_guard_net_quantized_TFLITE-cs_6490: jp0z1ov65 +gear_guard_net_quantized_TFLITE-cs_8250: jp8q3j4xp +gear_guard_net_quantized_TFLITE-cs_8550: jgkel692g +gear_guard_net_quantized_TFLITE-cs_auto_lemans_8255: j5q674m4p +gear_guard_net_quantized_TFLITE-cs_auto_lemans_8775: j56y3od0p +gear_guard_net_quantized_TFLITE-cs_auto_lemans_8650: jp3j4owlg +gear_guard_net_quantized_TFLITE-cs_auto_makena_8295: jgo21d4xp +gear_guard_net_quantized_TFLITE-cs_xr_8450: jpv6129j5 +gear_guard_net_quantized_QNN-cs_8_gen_2: jgjv03wxg +gear_guard_net_quantized_QNN-cs_8_gen_3: jpedr6l15 +gear_guard_net_quantized_QNN-cs_8_elite: jgz3xz4k5 +gear_guard_net_quantized_QNN-cs_6490: j5wedy465 +gear_guard_net_quantized_QNN-cs_8550: jg9l3odlg +gear_guard_net_quantized_QNN-cs_x_elite: jp14do62p +gear_guard_net_quantized_QNN-cs_auto_lemans_8255: jgdxr62ep +gear_guard_net_quantized_QNN-cs_auto_lemans_8775: j57yjo9l5 +gear_guard_net_quantized_QNN-cs_auto_lemans_8650: jp4lxe3v5 +gear_guard_net_quantized_QNN-cs_auto_makena_8295: jpxk70x15 +gear_guard_net_quantized_QNN-cs_xr_8450: j5mnw98wp +gear_guard_net_quantized_ONNX-cs_8_gen_2: jgn691kr5 +gear_guard_net_quantized_ONNX-cs_8_gen_3: jprv4xw9g +gear_guard_net_quantized_ONNX-cs_8_elite: jp2k7oe4p +gear_guard_net_quantized_ONNX-cs_x_elite: jpy148m7p +gear_guard_net_quantized_ONNX_DML_NPU-cs_x_elite: jp0z1o665 +gear_guard_net_TFLITE-cs_8_gen_2: jglv0wl85 +gear_guard_net_TFLITE-cs_8_gen_3: j56y3ow0p +gear_guard_net_TFLITE-cs_8_elite: jp3j4o6lg +gear_guard_net_TFLITE-cs_8550: jgo21d8xp +gear_guard_net_TFLITE-cs_auto_lemans_8255: jpv6127j5 +gear_guard_net_TFLITE-cs_auto_lemans_8775: jgjv03qxg +gear_guard_net_TFLITE-cs_auto_lemans_8650: jpedr6y15 +gear_guard_net_TFLITE-cs_auto_makena_8295: jgz3xznk5 +gear_guard_net_TFLITE-cs_xr_8450: j5wedy765 +gear_guard_net_QNN-cs_8_gen_2: jg9l3omlg +gear_guard_net_QNN-cs_8_gen_3: jp14doj2p +gear_guard_net_QNN-cs_8_elite: jgdxr63ep +gear_guard_net_QNN-cs_8550: j5wedy735 +gear_guard_net_QNN-cs_x_elite: jg9l3omwg +gear_guard_net_QNN-cs_auto_lemans_8255: jp14doj8p +gear_guard_net_QNN-cs_auto_lemans_8775: jgdxr63rp +gear_guard_net_QNN-cs_auto_lemans_8650: j57yjo4v5 +gear_guard_net_QNN-cs_auto_makena_8295: jp4lxe185 +gear_guard_net_QNN-cs_xr_8450: jpxk70435 +gear_guard_net_ONNX-cs_8_gen_2: j5mnw9mdp +gear_guard_net_ONNX-cs_8_gen_3: jgn691nk5 +gear_guard_net_ONNX-cs_8_elite: jprv4x00g +gear_guard_net_ONNX-cs_x_elite: jp2k7owrp +gear_guard_net_ONNX_DML_GPU-cs_x_elite: jpy148x8p +gear_guard_net_ONNX_DML_NPU-cs_x_elite: jp0z1oj95 +foot_track_net_quantized_TFLITE-cs_8_gen_2: jglv0wxj5 +foot_track_net_quantized_TFLITE-cs_8_gen_3: j56y3o76p +foot_track_net_quantized_TFLITE-cs_8_elite: jp3j4o93g +foot_track_net_quantized_TFLITE-cs_6490: jgo21drqp +foot_track_net_quantized_TFLITE-cs_8250: jpv612lk5 +foot_track_net_quantized_TFLITE-cs_8550: jgjv03rvg +foot_track_net_quantized_TFLITE-cs_auto_lemans_8255: jpedr67o5 +foot_track_net_quantized_TFLITE-cs_auto_lemans_8775: jgz3xzlo5 +foot_track_net_quantized_TFLITE-cs_auto_lemans_8650: j5wedyl35 +foot_track_net_quantized_TFLITE-cs_auto_makena_8295: jg9l3ozwg +foot_track_net_quantized_TFLITE-cs_xr_8450: jp14don8p +foot_track_net_quantized_QNN-cs_8_gen_2: jgdxr6drp +foot_track_net_quantized_QNN-cs_8_gen_3: j57yjoev5 +foot_track_net_quantized_QNN-cs_8_elite: jp4lxey85 +foot_track_net_quantized_QNN-cs_6490: jpxk70l35 +foot_track_net_quantized_QNN-cs_8550: j5mnw90dp +foot_track_net_quantized_QNN-cs_x_elite: jgn691zk5 +foot_track_net_quantized_QNN-cs_auto_lemans_8255: jprv4xl0g +foot_track_net_quantized_QNN-cs_auto_lemans_8775: jp2k7orrp +foot_track_net_quantized_QNN-cs_auto_lemans_8650: jpy148o8p +foot_track_net_quantized_QNN-cs_auto_makena_8295: jp0z1om95 +foot_track_net_quantized_QNN-cs_xr_8450: jp8q3jekp +foot_track_net_quantized_ONNX-cs_8_gen_2: jgkel62wg +foot_track_net_quantized_ONNX-cs_8_gen_3: j5q674lnp +foot_track_net_quantized_ONNX-cs_8_elite: jglv0wyj5 +foot_track_net_quantized_ONNX-cs_x_elite: j56y3o86p +foot_track_net_quantized_ONNX_DML_NPU-cs_x_elite: jp3j4oz3g +foot_track_net_TFLITE-cs_8_gen_2: jpedr66o5 +foot_track_net_TFLITE-cs_8_gen_3: jgz3xzzo5 +foot_track_net_TFLITE-cs_8_elite: j5wedyy35 +foot_track_net_TFLITE-cs_8550: jg9l3oowg +foot_track_net_TFLITE-cs_auto_lemans_8255: jp14doo8p +foot_track_net_TFLITE-cs_auto_lemans_8775: jgdxr66rp +foot_track_net_TFLITE-cs_auto_lemans_8650: j57yjoov5 +foot_track_net_TFLITE-cs_auto_makena_8295: jp4lxee85 +foot_track_net_TFLITE-cs_xr_8450: jpxk70035 +foot_track_net_QNN-cs_8_gen_2: j5mnw99dp +foot_track_net_QNN-cs_8_gen_3: jgn6911k5 +foot_track_net_QNN-cs_8_elite: jprv4xx0g +foot_track_net_QNN-cs_8550: jp2k7oorp +foot_track_net_QNN-cs_x_elite: jpy14888p +foot_track_net_QNN-cs_auto_lemans_8255: jp0z1oo95 +foot_track_net_QNN-cs_auto_lemans_8775: jp8q3jjkp +foot_track_net_QNN-cs_auto_lemans_8650: jgkel66wg +foot_track_net_QNN-cs_auto_makena_8295: j5q6744np +foot_track_net_QNN-cs_xr_8450: jglv0wwj5 +foot_track_net_ONNX-cs_8_gen_2: j56y3oo6p +foot_track_net_ONNX-cs_8_gen_3: jp3j4oo3g +foot_track_net_ONNX-cs_8_elite: jgo21ddqp +foot_track_net_ONNX-cs_x_elite: jpv612mk5 +foot_track_net_ONNX_DML_GPU-cs_x_elite: jgjv03yvg +foot_track_net_ONNX_DML_NPU-cs_x_elite: jpedr6xo5 +ffnet_78s_quantized_TFLITE-cs_8_gen_2: jp14do18p +ffnet_78s_quantized_TFLITE-cs_8_gen_3: jgdxr64rp +ffnet_78s_quantized_TFLITE-cs_8_elite: j57yjonv5 +ffnet_78s_quantized_TFLITE-cs_6490: jp4lxe485 +ffnet_78s_quantized_TFLITE-cs_8250: jpxk70r35 +ffnet_78s_quantized_TFLITE-cs_8550: j5mnw9kdp +ffnet_78s_quantized_TFLITE-cs_auto_lemans_8255: jgn691qk5 +ffnet_78s_quantized_TFLITE-cs_auto_lemans_8775: jprv4xd0g +ffnet_78s_quantized_TFLITE-cs_auto_lemans_8650: jp2k7odrp +ffnet_78s_quantized_TFLITE-cs_auto_makena_8295: jpy14828p +ffnet_78s_quantized_TFLITE-cs_xr_8450: jp0z1o995 +ffnet_78s_quantized_QNN-cs_8_gen_2: jp8q3jrkp +ffnet_78s_quantized_QNN-cs_8_gen_3: jgkel60wg +ffnet_78s_quantized_QNN-cs_8_elite: j5q6741np +ffnet_78s_quantized_QNN-cs_6490: jglv0w8j5 +ffnet_78s_quantized_QNN-cs_8550: j56y3om6p +ffnet_78s_quantized_QNN-cs_x_elite: jp3j4o73g +ffnet_78s_quantized_QNN-cs_auto_lemans_8255: jgo21dwqp +ffnet_78s_quantized_QNN-cs_auto_lemans_8775: jpv612nk5 +ffnet_78s_quantized_QNN-cs_auto_lemans_8650: jgjv038vg +ffnet_78s_quantized_QNN-cs_auto_makena_8295: jpedr6no5 +ffnet_78s_quantized_QNN-cs_xr_8450: jgz3xz0o5 +ffnet_78s_quantized_ONNX-cs_8_gen_2: j5wedyr35 +ffnet_78s_quantized_ONNX-cs_8_gen_3: jg9l3oqwg +ffnet_78s_quantized_ONNX-cs_8_elite: jp14dom8p +ffnet_78s_quantized_ONNX-cs_x_elite: jgdxr6mrp +ffnet_78s_quantized_ONNX_DML_NPU-cs_x_elite: j5wedyrm5 +ffnet_78s_lowres_TFLITE-cs_8_gen_2: jgdxr6mzp +ffnet_78s_lowres_TFLITE-cs_8_gen_3: j57yjo895 +ffnet_78s_lowres_TFLITE-cs_8_elite: jp4lxe215 +ffnet_78s_lowres_TFLITE-cs_8550: jpxk70zl5 +ffnet_78s_lowres_TFLITE-cs_auto_lemans_8255: j5mnw9l9p +ffnet_78s_lowres_TFLITE-cs_auto_lemans_8775: jgn691wq5 +ffnet_78s_lowres_TFLITE-cs_auto_lemans_8650: jprv4x77g +ffnet_78s_lowres_TFLITE-cs_auto_makena_8295: jp2k7ozqp +ffnet_78s_lowres_TFLITE-cs_xr_8450: jpy148ylp +ffnet_78s_lowres_QNN-cs_8_gen_2: jp0z1oxn5 +ffnet_78s_lowres_QNN-cs_8_gen_3: jp8q3jkop +ffnet_78s_lowres_QNN-cs_8_elite: jgkel6kng +ffnet_78s_lowres_QNN-cs_8550: j5q674dop +ffnet_78s_lowres_QNN-cs_x_elite: jglv0w9m5 +ffnet_78s_lowres_QNN-cs_auto_lemans_8255: j56y3o9yp +ffnet_78s_lowres_QNN-cs_auto_lemans_8775: jp3j4olng +ffnet_78s_lowres_QNN-cs_auto_lemans_8650: jgo21d7kp +ffnet_78s_lowres_QNN-cs_auto_makena_8295: jgz3x1lk5 +ffnet_78s_lowres_QNN-cs_xr_8450: jgjv036eg +ffnet_78s_lowres_ONNX-cs_8_gen_2: jpedr60v5 +ffnet_78s_lowres_ONNX-cs_8_gen_3: jgz3xzqx5 +ffnet_78s_lowres_ONNX-cs_8_elite: j5wedy0m5 +ffnet_78s_lowres_ONNX-cs_x_elite: jg9l3o78g +ffnet_78s_lowres_ONNX_DML_GPU-cs_x_elite: jp14dok7p +ffnet_78s_lowres_ONNX_DML_NPU-cs_x_elite: jgdxr6yzp +ffnet_78s_TFLITE-cs_8_gen_2: j5mnw919p +ffnet_78s_TFLITE-cs_8_gen_3: jgn691dq5 +ffnet_78s_TFLITE-cs_8_elite: jprv4xm7g +ffnet_78s_TFLITE-cs_8550: jp2k7oqqp +ffnet_78s_TFLITE-cs_auto_lemans_8255: jpy148klp +ffnet_78s_TFLITE-cs_auto_lemans_8775: jp0z1o8n5 +ffnet_78s_TFLITE-cs_auto_lemans_8650: jp8q3jdop +ffnet_78s_TFLITE-cs_auto_makena_8295: jgkel6wng +ffnet_78s_TFLITE-cs_xr_8450: j5q674xop +ffnet_78s_QNN-cs_8_gen_2: jglv0wdm5 +ffnet_78s_QNN-cs_8_gen_3: j56y3oxyp +ffnet_78s_QNN-cs_8_elite: jp3j4odng +ffnet_78s_QNN-cs_8550: jpv6128r5 +ffnet_78s_QNN-cs_x_elite: jgjv039eg +ffnet_78s_QNN-cs_auto_lemans_8255: jpedr6qv5 +ffnet_78s_QNN-cs_auto_lemans_8775: jgz3xz6x5 +ffnet_78s_QNN-cs_auto_lemans_8650: j5wedykm5 +ffnet_78s_QNN-cs_auto_makena_8295: jg9l3or8g +ffnet_78s_QNN-cs_xr_8450: jp14do97p +ffnet_78s_ONNX-cs_8_gen_2: jgdxr6kzp +ffnet_78s_ONNX-cs_8_gen_3: j57yjom95 +ffnet_78s_ONNX-cs_8_elite: jp4lxe715 +ffnet_78s_ONNX-cs_x_elite: jpxk70ql5 +ffnet_78s_ONNX_DML_GPU-cs_x_elite: j5mnw979p +ffnet_78s_ONNX_DML_NPU-cs_x_elite: jgn6914q5 +ffnet_54s_quantized_TFLITE-cs_8_gen_2: jp0z1own5 +ffnet_54s_quantized_TFLITE-cs_8_gen_3: jp8q3jnop +ffnet_54s_quantized_TFLITE-cs_8_elite: jgkel61ng +ffnet_54s_quantized_TFLITE-cs_6490: j5q674nop +ffnet_54s_quantized_TFLITE-cs_8250: jglv0wjm5 +ffnet_54s_quantized_TFLITE-cs_8550: j56y3okyp +ffnet_54s_quantized_TFLITE-cs_auto_lemans_8255: jp3j4oyng +ffnet_54s_quantized_TFLITE-cs_auto_lemans_8775: jgo21djkp +ffnet_54s_quantized_TFLITE-cs_auto_lemans_8650: jpv612jr5 +ffnet_54s_quantized_TFLITE-cs_auto_makena_8295: jgjv03jeg +ffnet_54s_quantized_TFLITE-cs_xr_8450: jpedr6jv5 +ffnet_54s_quantized_QNN-cs_8_gen_2: jgz3xz1x5 +ffnet_54s_quantized_QNN-cs_8_gen_3: j5wedyjm5 +ffnet_54s_quantized_QNN-cs_8_elite: jg9l3o68g +ffnet_54s_quantized_QNN-cs_6490: jp14dor7p +ffnet_54s_quantized_QNN-cs_8550: jgdxr6jzp +ffnet_54s_quantized_QNN-cs_x_elite: j57yjoq95 +ffnet_54s_quantized_QNN-cs_auto_lemans_8255: jp4lxez15 +ffnet_54s_quantized_QNN-cs_auto_lemans_8775: jpxk70wl5 +ffnet_54s_quantized_QNN-cs_auto_lemans_8650: j5mnw9j9p +ffnet_54s_quantized_QNN-cs_auto_makena_8295: jgn691jq5 +ffnet_54s_quantized_QNN-cs_xr_8450: jprv4xz7g +ffnet_54s_quantized_ONNX-cs_8_gen_2: jp2k7o2qp +ffnet_54s_quantized_ONNX-cs_8_gen_3: jpy1489lp +ffnet_54s_quantized_ONNX-cs_8_elite: jp0z1onn5 +ffnet_54s_quantized_ONNX-cs_x_elite: jp8q3jlop +ffnet_54s_quantized_ONNX_DML_NPU-cs_x_elite: jgkel6jng +ffnet_54s_TFLITE-cs_8_gen_2: j56y3o6yp +ffnet_54s_TFLITE-cs_8_gen_3: jp3j4okng +ffnet_54s_TFLITE-cs_8_elite: jgo21dykp +ffnet_54s_TFLITE-cs_8550: jpv6123r5 +ffnet_54s_TFLITE-cs_auto_lemans_8255: jgjv03xeg +ffnet_54s_TFLITE-cs_auto_lemans_8775: jpedr69v5 +ffnet_54s_TFLITE-cs_auto_lemans_8650: jgz3xzex5 +ffnet_54s_TFLITE-cs_auto_makena_8295: j5wedyom5 +ffnet_54s_TFLITE-cs_xr_8450: jg9l3ov8g +ffnet_54s_QNN-cs_8_gen_2: jp14do07p +ffnet_54s_QNN-cs_8_gen_3: jgdxr6wzp +ffnet_54s_QNN-cs_8_elite: j5wedyo45 +ffnet_54s_QNN-cs_8550: jg9l3ovmg +ffnet_54s_QNN-cs_x_elite: jp14do0np +ffnet_54s_QNN-cs_auto_lemans_8255: jgdxr6w6p +ffnet_54s_QNN-cs_auto_lemans_8775: j57yjozn5 +ffnet_54s_QNN-cs_auto_lemans_8650: jp4lxeq25 +ffnet_54s_QNN-cs_auto_makena_8295: jpxk70v85 +ffnet_54s_QNN-cs_xr_8450: j5mnw9r7p +ffnet_54s_ONNX-cs_8_gen_2: jgn6912j5 +ffnet_54s_ONNX-cs_8_gen_3: jprv4xkkg +ffnet_54s_ONNX-cs_8_elite: jp2k7o86p +ffnet_54s_ONNX-cs_x_elite: jpy148e0p +ffnet_54s_ONNX_DML_GPU-cs_x_elite: jp0z1oq05 +ffnet_54s_ONNX_DML_NPU-cs_x_elite: jp8q3j9qp +ffnet_40s_quantized_TFLITE-cs_8_gen_2: j56y3ojnp +ffnet_40s_quantized_TFLITE-cs_8_gen_3: jp3j4o3mg +ffnet_40s_quantized_TFLITE-cs_8_elite: jgo21d01p +ffnet_40s_quantized_TFLITE-cs_6490: jpv612oz5 +ffnet_40s_quantized_TFLITE-cs_8250: jgjv03m1g +ffnet_40s_quantized_TFLITE-cs_8550: jpedr6185 +ffnet_40s_quantized_TFLITE-cs_auto_lemans_8255: jgz3xz945 +ffnet_40s_quantized_TFLITE-cs_auto_lemans_8775: j5wedyv45 +ffnet_40s_quantized_TFLITE-cs_auto_lemans_8650: jg9l3o1mg +ffnet_40s_quantized_TFLITE-cs_auto_makena_8295: jp14dolnp +ffnet_40s_quantized_TFLITE-cs_xr_8450: jgdxr696p +ffnet_40s_quantized_QNN-cs_8_gen_2: j57yjown5 +ffnet_40s_quantized_QNN-cs_8_gen_3: jp4lxeo25 +ffnet_40s_quantized_QNN-cs_8_elite: jpxk70j85 +ffnet_40s_quantized_QNN-cs_6490: j5mnw927p +ffnet_40s_quantized_QNN-cs_8550: jgn691yj5 +ffnet_40s_quantized_QNN-cs_x_elite: jprv4xqkg +ffnet_40s_quantized_QNN-cs_auto_lemans_8255: jp2k7o66p +ffnet_40s_quantized_QNN-cs_auto_lemans_8775: jpy148w0p +ffnet_40s_quantized_QNN-cs_auto_lemans_8650: jp0z1o705 +ffnet_40s_quantized_QNN-cs_auto_makena_8295: jp8q3jvqp +ffnet_40s_quantized_QNN-cs_xr_8450: jgkel6mvg +ffnet_40s_quantized_ONNX-cs_8_gen_2: j5q674oep +ffnet_40s_quantized_ONNX-cs_8_gen_3: jglv0wr25 +ffnet_40s_quantized_ONNX-cs_8_elite: j56y3olnp +ffnet_40s_quantized_ONNX-cs_x_elite: jp3j4o2mg +ffnet_40s_quantized_ONNX_DML_NPU-cs_x_elite: jgo21dq1p +ffnet_40s_TFLITE-cs_8_gen_2: jpedr6385 +ffnet_40s_TFLITE-cs_8_gen_3: jgz3xzk45 +ffnet_40s_TFLITE-cs_8_elite: j5wedyn45 +ffnet_40s_TFLITE-cs_8550: jg9l3oemg +ffnet_40s_TFLITE-cs_auto_lemans_8255: jp14doxnp +ffnet_40s_TFLITE-cs_auto_lemans_8775: jgdxr6l6p +ffnet_40s_TFLITE-cs_auto_lemans_8650: j57yjo3n5 +ffnet_40s_TFLITE-cs_auto_makena_8295: jp4lxe025 +ffnet_40s_TFLITE-cs_xr_8450: jpxk70285 +ffnet_40s_QNN-cs_8_gen_2: j5mnw9y7p +ffnet_40s_QNN-cs_8_gen_3: jgn6918j5 +ffnet_40s_QNN-cs_8_elite: jprv4xjkg +ffnet_40s_QNN-cs_8550: jp2k7on6p +ffnet_40s_QNN-cs_x_elite: jpy14800p +ffnet_40s_QNN-cs_auto_lemans_8255: jp0z19005 +ffnet_40s_QNN-cs_auto_lemans_8775: jp8q3ryqp +ffnet_40s_QNN-cs_auto_lemans_8650: jgkel0xvg +ffnet_40s_QNN-cs_auto_makena_8295: j5q671qep +ffnet_40s_QNN-cs_xr_8450: jglv08m25 +ffnet_40s_ONNX-cs_8_gen_2: j56y3m4np +ffnet_40s_ONNX-cs_8_gen_3: jp3j470mg +ffnet_40s_ONNX-cs_8_elite: jgo21w61p +ffnet_40s_ONNX-cs_x_elite: jpv61mkz5 +ffnet_40s_ONNX_DML_GPU-cs_x_elite: jgjv0yn1g +ffnet_40s_ONNX_DML_NPU-cs_x_elite: jpedrxm85 +ffnet_122ns_lowres_TFLITE-cs_8_gen_2: jp14d1znp +ffnet_122ns_lowres_TFLITE-cs_8_gen_3: jgdxr416p +ffnet_122ns_lowres_TFLITE-cs_8_elite: j57yjnrn5 +ffnet_122ns_lowres_TFLITE-cs_8550: jp4lx4r25 +ffnet_122ns_lowres_TFLITE-cs_auto_lemans_8255: jpxk7ro85 +ffnet_122ns_lowres_TFLITE-cs_auto_lemans_8775: j5mnwkx7p +ffnet_122ns_lowres_TFLITE-cs_auto_lemans_8650: jgn69qvj5 +ffnet_122ns_lowres_TFLITE-cs_auto_makena_8295: jprv4d3kg +ffnet_122ns_lowres_TFLITE-cs_xr_8450: jp2k7dy6p +ffnet_122ns_lowres_QNN-cs_8_gen_2: jpy14230p +ffnet_122ns_lowres_QNN-cs_8_gen_3: jp0z19z05 +ffnet_122ns_lowres_QNN-cs_8_elite: jp8q3rqqp +ffnet_122ns_lowres_QNN-cs_8550: jgkel0evg +ffnet_122ns_lowres_QNN-cs_x_elite: j5q6716ep +ffnet_122ns_lowres_QNN-cs_auto_lemans_8255: jglv08v25 +ffnet_122ns_lowres_QNN-cs_auto_lemans_8775: j56y3mynp +ffnet_122ns_lowres_QNN-cs_auto_lemans_8650: jp3j47jmg +ffnet_122ns_lowres_QNN-cs_auto_makena_8295: jgo21w21p +ffnet_122ns_lowres_QNN-cs_xr_8450: jpv61m6z5 +ffnet_122ns_lowres_ONNX-cs_8_gen_2: jgjv0yv1g +ffnet_122ns_lowres_ONNX-cs_8_gen_3: jpedrxd85 +ffnet_122ns_lowres_ONNX-cs_8_elite: jgz3xy345 +ffnet_122ns_lowres_ONNX-cs_x_elite: j5wedze45 +ffnet_122ns_lowres_ONNX_DML_GPU-cs_x_elite: jg9l32lmg +ffnet_122ns_lowres_ONNX_DML_NPU-cs_x_elite: jp14d14np +fcn_resnet50_TFLITE-cs_8_gen_2: jp14d14kp +fcn_resnet50_TFLITE-cs_8_gen_3: jgdxr4xkp +fcn_resnet50_TFLITE-cs_8_elite: j57yjnyq5 +fcn_resnet50_TFLITE-cs_8550: jp4lx4lq5 +fcn_resnet50_TFLITE-cs_auto_lemans_8255: jpxk7rkj5 +fcn_resnet50_TFLITE-cs_auto_lemans_8775: j5mnwknyp +fcn_resnet50_TFLITE-cs_auto_lemans_8650: jgn69q0v5 +fcn_resnet50_TFLITE-cs_auto_makena_8295: jprv4d6vg +fcn_resnet50_TFLITE-cs_xr_8450: jp2k7dxxp +fcn_resnet50_QNN-cs_8_gen_2: jpy142zrp +fcn_resnet50_QNN-cs_8_gen_3: jp0z19425 +fcn_resnet50_QNN-cs_8_elite: jp8q3r2zp +fcn_resnet50_QNN-cs_8550: jgkel0vyg +fcn_resnet50_QNN-cs_x_elite: j5q67107p +fcn_resnet50_QNN-cs_auto_lemans_8255: jglv084e5 +fcn_resnet50_QNN-cs_auto_lemans_8775: j56y3m2vp +fcn_resnet50_QNN-cs_auto_lemans_8650: jp3j47nxg +fcn_resnet50_QNN-cs_auto_makena_8295: jgo21wz4p +fcn_resnet50_QNN-cs_xr_8450: jpv61mq75 +fcn_resnet50_ONNX-cs_8_gen_2: jgjv0yd7g +fcn_resnet50_ONNX-cs_8_gen_3: jpedrxo75 +fcn_resnet50_ONNX-cs_8_elite: jgz3xy2z5 +fcn_resnet50_ONNX-cs_x_elite: j5wedzwz5 +fcn_resnet50_ONNX_DML_GPU-cs_x_elite: jg9l320qg +fcn_resnet50_ONNX_DML_NPU-cs_x_elite: jp14d12kp +fastsam_x_TFLITE-cs_8_gen_2: jpxk7r9j5 +fastsam_x_TFLITE-cs_8_gen_3: j5mnwkeyp +fastsam_x_TFLITE-cs_8_elite: jgn69qlv5 +fastsam_x_TFLITE-cs_8550: jprv4d8vg +fastsam_x_TFLITE-cs_auto_lemans_8255: jp2k7d0xp +fastsam_x_TFLITE-cs_auto_lemans_8775: jpy142rrp +fastsam_x_TFLITE-cs_auto_lemans_8650: jp0z19325 +fastsam_x_TFLITE-cs_auto_makena_8295: jp8q3r0zp +fastsam_x_TFLITE-cs_xr_8450: jgkel07yg +fastsam_x_QNN-cs_8_gen_2: j5q671e7p +fastsam_x_QNN-cs_8_gen_3: jglv086e5 +fastsam_x_QNN-cs_8_elite: j56y3mevp +fastsam_x_QNN-cs_8550: jp3j47vxg +fastsam_x_QNN-cs_x_elite: jgo21wk4p +fastsam_x_QNN-cs_auto_lemans_8255: jpv61m075 +fastsam_x_QNN-cs_auto_lemans_8775: jgjv0yz7g +fastsam_x_QNN-cs_auto_lemans_8650: jpedrxe75 +fastsam_x_QNN-cs_auto_makena_8295: jgz3xyoz5 +fastsam_x_QNN-cs_xr_8450: j5wedz2z5 +fastsam_x_ONNX-cs_8_gen_2: jg9l32jqg +fastsam_x_ONNX-cs_8_gen_3: jp14d1ykp +fastsam_x_ONNX-cs_8_elite: jgdxr4ekp +fastsam_x_ONNX-cs_x_elite: j57yjn0q5 +fastsam_x_ONNX_DML_GPU-cs_x_elite: jp4lx4kq5 +fastsam_x_ONNX_DML_NPU-cs_x_elite: jpxk7rnj5 +fastsam_s_TFLITE-cs_8_gen_2: jprv4d2vg +fastsam_s_TFLITE-cs_8_gen_3: jp2k7d9xp +fastsam_s_TFLITE-cs_8_elite: jpy142jrp +fastsam_s_TFLITE-cs_8550: jp0z19225 +fastsam_s_TFLITE-cs_auto_lemans_8255: jp8q3rmzp +fastsam_s_TFLITE-cs_auto_lemans_8775: jgkel0qyg +fastsam_s_TFLITE-cs_auto_lemans_8650: j5q671r7p +fastsam_s_TFLITE-cs_auto_makena_8295: jglv082e5 +fastsam_s_TFLITE-cs_xr_8450: j56y3mzvp +fastsam_s_QNN-cs_8_gen_2: jp3j471xg +fastsam_s_QNN-cs_8_gen_3: jgo21wn4p +fastsam_s_QNN-cs_8_elite: jpv61mr75 +fastsam_s_QNN-cs_8550: jgjv0y27g +fastsam_s_QNN-cs_x_elite: jpedrxw75 +fastsam_s_QNN-cs_auto_lemans_8255: jgz3xyjz5 +fastsam_s_QNN-cs_auto_lemans_8775: j5wedz3z5 +fastsam_s_QNN-cs_auto_lemans_8650: jg9l32yqg +fastsam_s_QNN-cs_auto_makena_8295: jp14d1wkp +fastsam_s_QNN-cs_xr_8450: jgdxr4qkp +fastsam_s_ONNX-cs_8_gen_2: j57yjnlq5 +fastsam_s_ONNX-cs_8_gen_3: jp4lx4dq5 +fastsam_s_ONNX-cs_8_elite: jpxk7r6j5 +fastsam_s_ONNX-cs_x_elite: j5mnwk6yp +fastsam_s_ONNX_DML_GPU-cs_x_elite: jgn69q3v5 +fastsam_s_ONNX_DML_NPU-cs_x_elite: jprv4devg +facemap_3dmm_TFLITE-cs_8_gen_2: jp0z19l25 +facemap_3dmm_TFLITE-cs_8_gen_3: jp8q3rzzp +facemap_3dmm_TFLITE-cs_8_elite: jgkel03yg +facemap_3dmm_TFLITE-cs_8550: j5q67137p +facemap_3dmm_TFLITE-cs_auto_lemans_8255: jglv083e5 +facemap_3dmm_TFLITE-cs_auto_lemans_8775: j56y3mnvp +facemap_3dmm_TFLITE-cs_auto_lemans_8650: jp3j47exg +facemap_3dmm_TFLITE-cs_auto_makena_8295: jgo21w34p +facemap_3dmm_TFLITE-cs_xr_8450: jpv61mv75 +facemap_3dmm_QNN-cs_8_gen_2: jgjv0ye7g +facemap_3dmm_QNN-cs_8_gen_3: jpedrxk75 +facemap_3dmm_QNN-cs_8_elite: jgz3xyrz5 +facemap_3dmm_QNN-cs_8550: j5wedzqz5 +facemap_3dmm_QNN-cs_x_elite: jg9l32wqg +facemap_3dmm_QNN-cs_auto_lemans_8255: jp14d1ekp +facemap_3dmm_QNN-cs_auto_lemans_8775: jgdxr4okp +facemap_3dmm_QNN-cs_auto_lemans_8650: j5wedzqj5 +facemap_3dmm_QNN-cs_auto_makena_8295: jg9l32wvg +facemap_3dmm_QNN-cs_xr_8450: jp14d1elp +facemap_3dmm_ONNX-cs_8_gen_2: jgdxr4olp +facemap_3dmm_ONNX-cs_8_gen_3: j57yjndr5 +facemap_3dmm_ONNX-cs_8_elite: jp4lx4wl5 +facemap_3dmm_ONNX-cs_x_elite: jpxk7r195 +facemap_3dmm_ONNX_DML_GPU-cs_x_elite: j5mnwkzqp +facemap_3dmm_ONNX_DML_NPU-cs_x_elite: jgn69qem5 +face_det_lite_TFLITE-cs_8_gen_2: jp8q3r78p +face_det_lite_TFLITE-cs_8_gen_3: jgkel0yog +face_det_lite_TFLITE-cs_8_elite: j5q6712mp +face_det_lite_TFLITE-cs_8550: jglv08kl5 +face_det_lite_TFLITE-cs_auto_lemans_8255: j56y3m17p +face_det_lite_TFLITE-cs_auto_lemans_8775: jp3j47mzg +face_det_lite_TFLITE-cs_auto_lemans_8650: jgo21wvdp +face_det_lite_TFLITE-cs_auto_makena_8295: jpv61mwm5 +face_det_lite_TFLITE-cs_xr_8450: jgjv0yl8g +face_det_lite_QNN-cs_8_gen_2: jpedrxv05 +face_det_lite_QNN-cs_8_gen_3: jgz3xy765 +face_det_lite_QNN-cs_8_elite: j5wedz9j5 +face_det_lite_QNN-cs_8550: jg9l324vg +face_det_lite_QNN-cs_x_elite: jp14d18lp +face_det_lite_QNN-cs_auto_lemans_8255: jgdxr4vlp +face_det_lite_QNN-cs_auto_lemans_8775: j57yjnjr5 +face_det_lite_QNN-cs_auto_lemans_8650: jp4lx4xl5 +face_det_lite_QNN-cs_auto_makena_8295: jpxk7r795 +face_det_lite_QNN-cs_xr_8450: j5mnwkwqp +face_det_lite_ONNX-cs_8_gen_2: jgn69q9m5 +face_det_lite_ONNX-cs_8_gen_3: jprv4d4eg +face_det_lite_ONNX-cs_8_elite: jp2k7d7mp +face_det_lite_ONNX-cs_x_elite: jpy14244p +face_det_lite_ONNX_DML_GPU-cs_x_elite: jp0z191e5 +face_det_lite_ONNX_DML_NPU-cs_x_elite: jp8q3r38p +face_body_net_TFLITE-cs_8_gen_2: j56y3m37p +face_body_net_TFLITE-cs_8_gen_3: jp3j474zg +face_body_net_TFLITE-cs_8_elite: jgo21w1dp +face_body_net_TFLITE-cs_8550: jpv61m1m5 +face_body_net_TFLITE-cs_auto_lemans_8255: jgjv0y08g +face_body_net_TFLITE-cs_auto_lemans_8775: jpedrxr05 +face_body_net_TFLITE-cs_auto_lemans_8650: jgz3xyx65 +face_body_net_TFLITE-cs_auto_makena_8295: j5wedzdj5 +face_body_net_TFLITE-cs_xr_8450: jg9l323vg +face_body_net_QNN-cs_8_gen_2: jp14d1dlp +face_body_net_QNN-cs_8_gen_3: jgdxr4rlp +face_body_net_QNN-cs_8_elite: j57yjnvr5 +face_body_net_QNN-cs_8550: jp4lx4jl5 +face_body_net_QNN-cs_x_elite: jpxk7re95 +face_body_net_QNN-cs_auto_lemans_8255: j5mnwkvqp +face_body_net_QNN-cs_auto_lemans_8775: jgn69qrm5 +face_body_net_QNN-cs_auto_lemans_8650: jprv4d1eg +face_body_net_QNN-cs_auto_makena_8295: jp2k7d3mp +face_body_net_QNN-cs_xr_8450: jpy142v4p +face_body_net_ONNX-cs_8_gen_2: jp0z19ee5 +face_body_net_ONNX-cs_8_gen_3: jp8q3rw8p +face_body_net_ONNX-cs_8_elite: jgkel0rog +face_body_net_ONNX-cs_x_elite: j5q6719mp +face_body_net_ONNX_DML_GPU-cs_x_elite: jglv08el5 +face_body_net_ONNX_DML_NPU-cs_x_elite: j56y3mq7p +face_attrib_net_TFLITE-cs_8_gen_2: jgjv0yk8g +face_attrib_net_TFLITE-cs_8_gen_3: jpedrx405 +face_attrib_net_TFLITE-cs_8_elite: jgz3xyv65 +face_attrib_net_TFLITE-cs_8550: j5wedzmj5 +face_attrib_net_TFLITE-cs_auto_lemans_8255: jg9l329vg +face_attrib_net_TFLITE-cs_auto_lemans_8775: jp14d1qlp +face_attrib_net_TFLITE-cs_auto_lemans_8650: jgdxr47lp +face_attrib_net_TFLITE-cs_auto_makena_8295: j57yjn6r5 +face_attrib_net_TFLITE-cs_xr_8450: jp4lx48l5 +face_attrib_net_QNN-cs_8_gen_2: jpxk7rm95 +face_attrib_net_QNN-cs_8_gen_3: j5mnwk4qp +face_attrib_net_QNN-cs_8_elite: jgn69qxm5 +face_attrib_net_QNN-cs_8550: jprv4d9eg +face_attrib_net_QNN-cs_x_elite: jp2k7djmp +face_attrib_net_QNN-cs_auto_lemans_8255: jpy142n4p +face_attrib_net_QNN-cs_auto_lemans_8775: jp0z19ke5 +face_attrib_net_QNN-cs_auto_lemans_8650: jp8q3r88p +face_attrib_net_QNN-cs_auto_makena_8295: jgkel0dog +face_attrib_net_QNN-cs_xr_8450: j5q671wmp +face_attrib_net_ONNX-cs_8_gen_2: jglv087l5 +face_attrib_net_ONNX-cs_8_gen_3: j56y3mv7p +face_attrib_net_ONNX-cs_8_elite: jp3j478zg +face_attrib_net_ONNX-cs_x_elite: jgo21wmdp +face_attrib_net_ONNX_DML_GPU-cs_x_elite: jpv61m4m5 +face_attrib_net_ONNX_DML_NPU-cs_x_elite: jgjv0y18g +esrgan_TFLITE-cs_8_gen_2: jg9l328vg +esrgan_TFLITE-cs_8_gen_3: jp14d13lp +esrgan_TFLITE-cs_8_elite: jgdxr40lp +esrgan_TFLITE-cs_8550: jg9l32klg +esrgan_TFLITE-cs_auto_lemans_8255: jp14d172p +esrgan_TFLITE-cs_auto_lemans_8775: jgdxr48ep +esrgan_TFLITE-cs_auto_lemans_8650: j57yjnkl5 +esrgan_TFLITE-cs_auto_makena_8295: jp4lx4mv5 +esrgan_TFLITE-cs_xr_8450: jpxk7r315 +esrgan_QNN-cs_8_gen_2: j5mnwkowp +esrgan_QNN-cs_8_gen_3: jgn69qor5 +esrgan_QNN-cs_8_elite: jprv4do9g +esrgan_QNN-cs_8550: jp2k7d44p +esrgan_QNN-cs_x_elite: jpy142q7p +esrgan_QNN-cs_auto_lemans_8255: jp0z19d65 +esrgan_QNN-cs_auto_lemans_8775: jp8q3r6xp +esrgan_QNN-cs_auto_lemans_8650: jgkel0o2g +esrgan_QNN-cs_auto_makena_8295: j5q671z4p +esrgan_QNN-cs_xr_8450: jglv08o85 +esrgan_ONNX-cs_8_gen_2: j56y3mr0p +esrgan_ONNX-cs_8_gen_3: jp3j47xlg +esrgan_ONNX-cs_8_elite: jgo21woxp +esrgan_ONNX-cs_x_elite: jpv61mej5 +esrgan_ONNX_DML_GPU-cs_x_elite: jgjv0yoxg +esrgan_ONNX_DML_NPU-cs_x_elite: jpedrx815 +efficientvit_l2_cls_TFLITE-cs_8_gen_2: jp14d1v2p +efficientvit_l2_cls_TFLITE-cs_8_gen_3: jgdxr4zep +efficientvit_l2_cls_TFLITE-cs_8_elite: j57yjn7l5 +efficientvit_l2_cls_TFLITE-cs_8550: jp4lx49v5 +efficientvit_l2_cls_TFLITE-cs_auto_lemans_8255: jpxk7rd15 +efficientvit_l2_cls_TFLITE-cs_auto_lemans_8775: j5mnwkdwp +efficientvit_l2_cls_TFLITE-cs_auto_lemans_8650: jgn69q7r5 +efficientvit_l2_cls_TFLITE-cs_auto_makena_8295: jprv4dn9g +efficientvit_l2_cls_TFLITE-cs_xr_8450: jp2k7dv4p +efficientvit_l2_cls_QNN-cs_8_gen_2: jpy14277p +efficientvit_l2_cls_QNN-cs_8_gen_3: jp0z19v65 +efficientvit_l2_cls_QNN-cs_8_elite: jp8q3r4xp +efficientvit_l2_cls_QNN-cs_8550: jgkel092g +efficientvit_l2_cls_QNN-cs_x_elite: j5q671m4p +efficientvit_l2_cls_QNN-cs_auto_lemans_8255: jglv08185 +efficientvit_l2_cls_QNN-cs_auto_lemans_8775: j56y3md0p +efficientvit_l2_cls_QNN-cs_auto_lemans_8650: jp3j47wlg +efficientvit_l2_cls_QNN-cs_auto_makena_8295: jgo21w4xp +efficientvit_l2_cls_QNN-cs_xr_8450: jpv61m9j5 +efficientvit_l2_cls_ONNX-cs_8_gen_2: jgjv0ywxg +efficientvit_l2_cls_ONNX-cs_8_gen_3: jpedrxl15 +efficientvit_l2_cls_ONNX-cs_8_elite: jgz3xy4k5 +efficientvit_l2_cls_ONNX-cs_x_elite: j5wedz465 +efficientvit_l2_cls_ONNX_DML_GPU-cs_x_elite: jg9l32dlg +efficientvit_l2_cls_ONNX_DML_NPU-cs_x_elite: jp14d162p +efficientvit_b2_cls_TFLITE-cs_8_gen_2: jpxk7rx15 +efficientvit_b2_cls_TFLITE-cs_8_gen_3: j5mnwk8wp +efficientvit_b2_cls_TFLITE-cs_8_elite: jgn69qkr5 +efficientvit_b2_cls_TFLITE-cs_8550: jprv4dw9g +efficientvit_b2_cls_TFLITE-cs_auto_lemans_8255: jp2k7de4p +efficientvit_b2_cls_TFLITE-cs_auto_lemans_8775: jpy142m7p +efficientvit_b2_cls_TFLITE-cs_auto_lemans_8650: jp0z19665 +efficientvit_b2_cls_TFLITE-cs_auto_makena_8295: jp8q3r1xp +efficientvit_b2_cls_TFLITE-cs_xr_8450: jgkel082g +efficientvit_b2_cls_QNN-cs_8_gen_2: j5q671v4p +efficientvit_b2_cls_QNN-cs_8_gen_3: jglv08l85 +efficientvit_b2_cls_QNN-cs_8_elite: j56y3mw0p +efficientvit_b2_cls_QNN-cs_8550: jp3j476lg +efficientvit_b2_cls_QNN-cs_x_elite: jgo21w8xp +efficientvit_b2_cls_QNN-cs_auto_lemans_8255: jpv61m7j5 +efficientvit_b2_cls_QNN-cs_auto_lemans_8775: jgjv0yqxg +efficientvit_b2_cls_QNN-cs_auto_lemans_8650: jpedrxy15 +efficientvit_b2_cls_QNN-cs_auto_makena_8295: jgz3xynk5 +efficientvit_b2_cls_QNN-cs_xr_8450: j5wedz765 +efficientvit_b2_cls_ONNX-cs_8_gen_2: jg9l32mlg +efficientvit_b2_cls_ONNX-cs_8_gen_3: jp14d1j2p +efficientvit_b2_cls_ONNX-cs_8_elite: jgdxr43ep +efficientvit_b2_cls_ONNX-cs_x_elite: j57yjn4l5 +efficientvit_b2_cls_ONNX_DML_GPU-cs_x_elite: jp4lx41v5 +efficientvit_b2_cls_ONNX_DML_NPU-cs_x_elite: jpxk7r415 +efficientnet_b4_TFLITE-cs_8_gen_2: jp2k7dw4p +efficientnet_b4_TFLITE-cs_8_gen_3: jpy142x7p +efficientnet_b4_TFLITE-cs_8_elite: jp0z19j65 +efficientnet_b4_TFLITE-cs_8550: jp8q3rxxp +efficientnet_b4_TFLITE-cs_auto_lemans_8255: jgkel042g +efficientnet_b4_TFLITE-cs_auto_lemans_8775: j5q671y4p +efficientnet_b4_TFLITE-cs_auto_lemans_8650: jglv08x85 +efficientnet_b4_TFLITE-cs_auto_makena_8295: j56y3m70p +efficientnet_b4_TFLITE-cs_xr_8450: jp3j479lg +efficientnet_b4_QNN-cs_8_gen_2: jgo21wrxp +efficientnet_b4_QNN-cs_8_gen_3: jpv61mdj5 +efficientnet_b4_QNN-cs_8_elite: jgjv0y7xg +efficientnet_b4_QNN-cs_8550: jpedrxz15 +efficientnet_b4_QNN-cs_x_elite: jgz3xymk5 +efficientnet_b4_QNN-cs_auto_lemans_8255: j5wedzl65 +efficientnet_b4_QNN-cs_auto_lemans_8775: jg9l32zlg +efficientnet_b4_QNN-cs_auto_lemans_8650: jgdxr4dep +efficientnet_b4_QNN-cs_auto_makena_8295: j5wedzl35 +efficientnet_b4_QNN-cs_xr_8450: jg9l32zwg +efficientnet_b4_ONNX-cs_8_gen_2: jp14d1n8p +efficientnet_b4_ONNX-cs_8_gen_3: jgdxr4drp +efficientnet_b4_ONNX-cs_8_elite: j57yjnev5 +efficientnet_b4_ONNX-cs_x_elite: jp4lx4y85 +efficientnet_b4_ONNX_DML_GPU-cs_x_elite: jpxk7rl35 +efficientnet_b4_ONNX_DML_NPU-cs_x_elite: j5mnwk0dp +efficientnet_b0_TFLITE-cs_8_gen_2: jpy142o8p +efficientnet_b0_TFLITE-cs_8_gen_3: jp0z19m95 +efficientnet_b0_TFLITE-cs_8_elite: jp8q3rekp +efficientnet_b0_TFLITE-cs_8550: jgkel02wg +efficientnet_b0_TFLITE-cs_auto_lemans_8255: j5q671lnp +efficientnet_b0_TFLITE-cs_auto_lemans_8775: jglv08yj5 +efficientnet_b0_TFLITE-cs_auto_lemans_8650: j56y3m86p +efficientnet_b0_TFLITE-cs_auto_makena_8295: jp3j47z3g +efficientnet_b0_TFLITE-cs_xr_8450: jgo21wlqp +efficientnet_b0_QNN-cs_8_gen_2: jpv61m2k5 +efficientnet_b0_QNN-cs_8_gen_3: jgjv0y3vg +efficientnet_b0_QNN-cs_8_elite: jpedrx6o5 +efficientnet_b0_QNN-cs_8550: jgz3xyzo5 +efficientnet_b0_QNN-cs_x_elite: j5wedzy35 +efficientnet_b0_QNN-cs_auto_lemans_8255: jg9l32owg +efficientnet_b0_QNN-cs_auto_lemans_8775: jp14d1o8p +efficientnet_b0_QNN-cs_auto_lemans_8650: jgdxr46rp +efficientnet_b0_QNN-cs_auto_makena_8295: j57yjnov5 +efficientnet_b0_QNN-cs_xr_8450: jpxk7r035 +efficientnet_b0_ONNX-cs_8_gen_2: j5mnwk9dp +efficientnet_b0_ONNX-cs_8_gen_3: jgn69q1k5 +efficientnet_b0_ONNX-cs_8_elite: jprv4dx0g +efficientnet_b0_ONNX-cs_x_elite: jp2k7dorp +efficientnet_b0_ONNX_DML_GPU-cs_x_elite: jpy14288p +efficientnet_b0_ONNX_DML_NPU-cs_x_elite: jp0z19o95 +detr_resnet50_dc5_TFLITE-cs_8_gen_2: jglv08wj5 +detr_resnet50_dc5_TFLITE-cs_8_gen_3: j56y3mo6p +detr_resnet50_dc5_TFLITE-cs_8_elite: jp3j47o3g +detr_resnet50_dc5_TFLITE-cs_8550: jgo21wdqp +detr_resnet50_dc5_TFLITE-cs_auto_lemans_8255: jpv61mmk5 +detr_resnet50_dc5_TFLITE-cs_auto_lemans_8775: jgjv0yyvg +detr_resnet50_dc5_TFLITE-cs_auto_lemans_8650: jpedrxxo5 +detr_resnet50_dc5_TFLITE-cs_auto_makena_8295: jgz3xyyo5 +detr_resnet50_dc5_TFLITE-cs_xr_8450: j5wedzz35 +detr_resnet50_dc5_QNN-cs_8_gen_2: jg9l322wg +detr_resnet50_dc5_QNN-cs_8_gen_3: jp14d118p +detr_resnet50_dc5_QNN-cs_8_elite: jgdxr44rp +detr_resnet50_dc5_QNN-cs_8550: j57yjnnv5 +detr_resnet50_dc5_QNN-cs_x_elite: jp4lx4485 +detr_resnet50_dc5_QNN-cs_auto_lemans_8255: jpxk7rr35 +detr_resnet50_dc5_QNN-cs_auto_lemans_8775: j5mnwkkdp +detr_resnet50_dc5_QNN-cs_auto_lemans_8650: jgn69qqk5 +detr_resnet50_dc5_QNN-cs_auto_makena_8295: jprv4dd0g +detr_resnet50_dc5_QNN-cs_xr_8450: jp2k7ddrp +detr_resnet50_dc5_ONNX-cs_8_gen_2: jpy14228p +detr_resnet50_dc5_ONNX-cs_8_gen_3: jp0z19995 +detr_resnet50_dc5_ONNX-cs_8_elite: jp8q3rrkp +detr_resnet50_dc5_ONNX-cs_x_elite: jgkel00wg +detr_resnet50_dc5_ONNX_DML_GPU-cs_x_elite: j5q6711np +detr_resnet50_dc5_ONNX_DML_NPU-cs_x_elite: jglv088j5 +detr_resnet50_TFLITE-cs_8_gen_2: jgo21wwqp +detr_resnet50_TFLITE-cs_8_gen_3: jpv61mnk5 +detr_resnet50_TFLITE-cs_8_elite: jgjv0y8vg +detr_resnet50_TFLITE-cs_8550: jpedrxno5 +detr_resnet50_TFLITE-cs_auto_lemans_8255: jgz3xy0o5 +detr_resnet50_TFLITE-cs_auto_lemans_8775: j5wedzr35 +detr_resnet50_TFLITE-cs_auto_lemans_8650: jg9l32qwg +detr_resnet50_TFLITE-cs_auto_makena_8295: jp14d1m8p +detr_resnet50_TFLITE-cs_xr_8450: jgdxr4mrp +detr_resnet50_QNN-cs_8_gen_2: j57yjn8v5 +detr_resnet50_QNN-cs_8_gen_3: jp4lx4285 +detr_resnet50_QNN-cs_8_elite: jpxk7rz35 +detr_resnet50_QNN-cs_8550: j5mnwkldp +detr_resnet50_QNN-cs_x_elite: jgn69qwk5 +detr_resnet50_QNN-cs_auto_lemans_8255: jprv4d70g +detr_resnet50_QNN-cs_auto_lemans_8775: jp2k7dzrp +detr_resnet50_QNN-cs_auto_lemans_8650: jpy142y8p +detr_resnet50_QNN-cs_auto_makena_8295: jp0z19x95 +detr_resnet50_QNN-cs_xr_8450: jp8q3rkkp +detr_resnet50_ONNX-cs_8_gen_2: jgkel0kwg +detr_resnet50_ONNX-cs_8_gen_3: j5q671dnp +detr_resnet50_ONNX-cs_8_elite: jglv08qj5 +detr_resnet50_ONNX-cs_x_elite: j56y3m06p +detr_resnet50_ONNX_DML_GPU-cs_x_elite: jp3j47r3g +detr_resnet50_ONNX_DML_NPU-cs_x_elite: jgo21w9qp +detr_resnet101_dc5_TFLITE-cs_8_gen_2: jpedrx0o5 +detr_resnet101_dc5_TFLITE-cs_8_gen_3: jgz3xyqo5 +detr_resnet101_dc5_TFLITE-cs_8_elite: j5wedz035 +detr_resnet101_dc5_TFLITE-cs_8550: jg9l327wg +detr_resnet101_dc5_TFLITE-cs_auto_lemans_8255: jp14d1k8p +detr_resnet101_dc5_TFLITE-cs_auto_lemans_8775: jgdxr4yrp +detr_resnet101_dc5_TFLITE-cs_auto_lemans_8650: j5wedz0m5 +detr_resnet101_dc5_TFLITE-cs_auto_makena_8295: jg9l3278g +detr_resnet101_dc5_TFLITE-cs_xr_8450: jp14d1k7p +detr_resnet101_dc5_QNN-cs_8_gen_2: jgdxr4yzp +detr_resnet101_dc5_QNN-cs_8_gen_3: j57yjn195 +detr_resnet101_dc5_QNN-cs_8_elite: jp4lx4615 +detr_resnet101_dc5_QNN-cs_8550: jpxk7r8l5 +detr_resnet101_dc5_QNN-cs_x_elite: j5mnwk19p +detr_resnet101_dc5_QNN-cs_auto_lemans_8255: jgn69qdq5 +detr_resnet101_dc5_QNN-cs_auto_lemans_8775: jprv4dm7g +detr_resnet101_dc5_QNN-cs_auto_lemans_8650: jp2k7dqqp +detr_resnet101_dc5_QNN-cs_auto_makena_8295: jpy142klp +detr_resnet101_dc5_QNN-cs_xr_8450: jp0z198n5 +detr_resnet101_dc5_ONNX-cs_8_gen_2: jp8q3rdop +detr_resnet101_dc5_ONNX-cs_8_gen_3: jgkel0wng +detr_resnet101_dc5_ONNX-cs_8_elite: j5q671xop +detr_resnet101_dc5_ONNX-cs_x_elite: jglv08dm5 +detr_resnet101_dc5_ONNX_DML_GPU-cs_x_elite: j56y3mxyp +detr_resnet101_dc5_ONNX_DML_NPU-cs_x_elite: jp3j47dng +detr_resnet101_TFLITE-cs_8_gen_2: jgjv0y9eg +detr_resnet101_TFLITE-cs_8_gen_3: jpedrxqv5 +detr_resnet101_TFLITE-cs_8_elite: jgz3xy6x5 +detr_resnet101_TFLITE-cs_8550: j5wedzkm5 +detr_resnet101_TFLITE-cs_auto_lemans_8255: jg9l32r8g +detr_resnet101_TFLITE-cs_auto_lemans_8775: jp14d197p +detr_resnet101_TFLITE-cs_auto_lemans_8650: jgdxr4kzp +detr_resnet101_TFLITE-cs_auto_makena_8295: j57yjnm95 +detr_resnet101_TFLITE-cs_xr_8450: jp4lx4715 +detr_resnet101_QNN-cs_8_gen_2: jpxk7rql5 +detr_resnet101_QNN-cs_8_gen_3: j5mnwk79p +detr_resnet101_QNN-cs_8_elite: jgn69q4q5 +detr_resnet101_QNN-cs_8550: jprv4dr7g +detr_resnet101_QNN-cs_x_elite: jp2k7d1qp +detr_resnet101_QNN-cs_auto_lemans_8255: jpy142llp +detr_resnet101_QNN-cs_auto_lemans_8775: jp0z19wn5 +detr_resnet101_QNN-cs_auto_lemans_8650: jp8q3rnop +detr_resnet101_QNN-cs_auto_makena_8295: jgkel01ng +detr_resnet101_QNN-cs_xr_8450: j5q671nop +detr_resnet101_ONNX-cs_8_gen_2: jglv08jm5 +detr_resnet101_ONNX-cs_8_gen_3: j56y3mkyp +detr_resnet101_ONNX-cs_8_elite: jp3j47yng +detr_resnet101_ONNX-cs_x_elite: jgo21wjkp +detr_resnet101_ONNX_DML_GPU-cs_x_elite: jpv61mjr5 +detr_resnet101_ONNX_DML_NPU-cs_x_elite: jgjv0yjeg +densenet121_TFLITE-cs_8_gen_2: j5wedzjm5 +densenet121_TFLITE-cs_8_gen_3: jg9l3268g +densenet121_TFLITE-cs_8_elite: jp14d1r7p +densenet121_TFLITE-cs_8550: jgdxr4jzp +densenet121_TFLITE-cs_auto_lemans_8255: j57yjnq95 +densenet121_TFLITE-cs_auto_lemans_8775: jp4lx4z15 +densenet121_TFLITE-cs_auto_lemans_8650: jpxk7rwl5 +densenet121_TFLITE-cs_auto_makena_8295: j5mnwkj9p +densenet121_TFLITE-cs_xr_8450: jgn69qjq5 +densenet121_QNN-cs_8_gen_2: jprv4dz7g +densenet121_QNN-cs_8_gen_3: jp2k7d2qp +densenet121_QNN-cs_8_elite: jpy1429lp +densenet121_QNN-cs_8550: jp0z19nn5 +densenet121_QNN-cs_x_elite: jp8q3rlop +densenet121_QNN-cs_auto_lemans_8255: jgkel0jng +densenet121_QNN-cs_auto_lemans_8775: j5q671jop +densenet121_QNN-cs_auto_lemans_8650: jglv08nm5 +densenet121_QNN-cs_auto_makena_8295: j56y3m6yp +densenet121_QNN-cs_xr_8450: jp3j47kng +densenet121_ONNX-cs_8_gen_2: jgo21wykp +densenet121_ONNX-cs_8_gen_3: jpv61m3r5 +densenet121_ONNX-cs_8_elite: jgjv0yxeg +densenet121_ONNX-cs_x_elite: jpedrx9v5 +densenet121_ONNX_DML_GPU-cs_x_elite: jgz3xyex5 +densenet121_ONNX_DML_NPU-cs_x_elite: j5wedzom5 +deeplabv3_resnet50_TFLITE-cs_8_gen_2: j57yjnz95 +deeplabv3_resnet50_TFLITE-cs_8_gen_3: jp4lx4q15 +deeplabv3_resnet50_TFLITE-cs_8_elite: jpxk7rvl5 +deeplabv3_resnet50_TFLITE-cs_8550: j5mnwkr9p +deeplabv3_resnet50_TFLITE-cs_auto_lemans_8255: jpv61jlj5 +deeplabv3_resnet50_TFLITE-cs_auto_lemans_8775: jprv4dk7g +deeplabv3_resnet50_TFLITE-cs_auto_lemans_8650: jp2k7d8qp +deeplabv3_resnet50_TFLITE-cs_auto_makena_8295: jpy142elp +deeplabv3_resnet50_TFLITE-cs_xr_8450: jp0z19yn5 +deeplabv3_resnet50_QNN-cs_8_gen_2: jp8q3roop +deeplabv3_resnet50_QNN-cs_8_gen_3: jgkel0zng +deeplabv3_resnet50_QNN-cs_8_elite: j5q6718op +deeplabv3_resnet50_QNN-cs_auto_makena_8295: j56y3mjyp +deeplabv3_resnet50_QNN-cs_xr_8450: jp3j473ng +deeplabv3_resnet50_ONNX-cs_8_gen_2: jgo21w0kp +deeplabv3_resnet50_ONNX-cs_8_gen_3: jpv61mor5 +deeplabv3_resnet50_ONNX-cs_8_elite: jgjv0ymeg +deeplabv3_resnet50_ONNX-cs_x_elite: jpedrx1v5 +deeplabv3_resnet50_ONNX_DML_GPU-cs_x_elite: jgz3xy9x5 +deeplabv3_resnet50_ONNX_DML_NPU-cs_x_elite: j5wedzvm5 +deeplabv3_plus_mobilenet_TFLITE-cs_8_gen_2: jp14d1l7p +deeplabv3_plus_mobilenet_TFLITE-cs_8_gen_3: jgdxr49zp +deeplabv3_plus_mobilenet_TFLITE-cs_8_elite: j5wedzv45 +deeplabv3_plus_mobilenet_TFLITE-cs_8550: jg9l321mg +deeplabv3_plus_mobilenet_TFLITE-cs_auto_lemans_8255: jp14d1lnp +deeplabv3_plus_mobilenet_TFLITE-cs_auto_lemans_8775: jgdxr496p +deeplabv3_plus_mobilenet_TFLITE-cs_auto_lemans_8650: j57yjnwn5 +deeplabv3_plus_mobilenet_TFLITE-cs_auto_makena_8295: jp4lx4o25 +deeplabv3_plus_mobilenet_TFLITE-cs_xr_8450: jpxk7rj85 +deeplabv3_plus_mobilenet_QNN-cs_8_gen_2: j5mnwk27p +deeplabv3_plus_mobilenet_QNN-cs_8_gen_3: jgn69qyj5 +deeplabv3_plus_mobilenet_QNN-cs_8_elite: jprv4dqkg +deeplabv3_plus_mobilenet_QNN-cs_8550: jp2k7d66p +deeplabv3_plus_mobilenet_QNN-cs_x_elite: jpy142w0p +deeplabv3_plus_mobilenet_QNN-cs_auto_lemans_8255: jp0z19705 +deeplabv3_plus_mobilenet_QNN-cs_auto_lemans_8775: jp8q3rvqp +deeplabv3_plus_mobilenet_QNN-cs_auto_lemans_8650: jgkel0mvg +deeplabv3_plus_mobilenet_QNN-cs_auto_makena_8295: j5q671oep +deeplabv3_plus_mobilenet_QNN-cs_xr_8450: jglv08r25 +deeplabv3_plus_mobilenet_ONNX-cs_8_gen_2: j56y3mlnp +deeplabv3_plus_mobilenet_ONNX-cs_8_gen_3: jp3j472mg +deeplabv3_plus_mobilenet_ONNX-cs_8_elite: jgo21wq1p +deeplabv3_plus_mobilenet_ONNX-cs_x_elite: jpv61mxz5 +deeplabv3_plus_mobilenet_ONNX_DML_GPU-cs_x_elite: jgjv0y41g +deeplabv3_plus_mobilenet_ONNX_DML_NPU-cs_x_elite: jpedrx385 +ddrnet23_slim_TFLITE-cs_8_gen_2: jp14d1xnp +ddrnet23_slim_TFLITE-cs_8_gen_3: jgdxr4l6p +ddrnet23_slim_TFLITE-cs_8_elite: j57yjn3n5 +ddrnet23_slim_TFLITE-cs_8550: jp4lx4025 +ddrnet23_slim_TFLITE-cs_auto_lemans_8255: jpxk7r285 +ddrnet23_slim_TFLITE-cs_auto_lemans_8775: j5mnwky7p +ddrnet23_slim_TFLITE-cs_auto_lemans_8650: jgn69q8j5 +ddrnet23_slim_TFLITE-cs_auto_makena_8295: jprv4djkg +ddrnet23_slim_TFLITE-cs_xr_8450: jp2k7dn6p +ddrnet23_slim_QNN-cs_8_gen_2: jpy14200p +ddrnet23_slim_QNN-cs_8_gen_3: jp0z1x005 +ddrnet23_slim_QNN-cs_8_elite: jp8q3kyqp +ddrnet23_slim_QNN-cs_8550: jgkelkxvg +ddrnet23_slim_QNN-cs_x_elite: j5q67dqep +ddrnet23_slim_QNN-cs_auto_lemans_8255: jglv0qm25 +ddrnet23_slim_QNN-cs_auto_lemans_8775: j56y304np +ddrnet23_slim_QNN-cs_auto_lemans_8650: jp3j4r0mg +ddrnet23_slim_QNN-cs_auto_makena_8295: jgo21961p +ddrnet23_slim_QNN-cs_xr_8450: jpv61nkz5 +ddrnet23_slim_ONNX-cs_8_gen_2: jgjv08n1g +ddrnet23_slim_ONNX-cs_8_gen_3: jpedrnm85 +ddrnet23_slim_ONNX-cs_8_elite: jgz3x0d45 +ddrnet23_slim_ONNX-cs_x_elite: j5wedr645 +ddrnet23_slim_ONNX_DML_GPU-cs_x_elite: jg9l3qnmg +ddrnet23_slim_ONNX_DML_NPU-cs_x_elite: jp14dmznp +convnext_tiny_w8a16_quantized_QNN-cs_8_gen_2: jp4lx2r25 +convnext_tiny_w8a16_quantized_QNN-cs_8_gen_3: jpxk7zo85 +convnext_tiny_w8a16_quantized_QNN-cs_8_elite: j5mnwlx7p +convnext_tiny_w8a16_quantized_QNN-cs_6490: jgn69wvj5 +convnext_tiny_w8a16_quantized_QNN-cs_8550: jprv473kg +convnext_tiny_w8a16_quantized_QNN-cs_x_elite: jp2k7zy6p +convnext_tiny_w8a16_quantized_QNN-cs_auto_lemans_8255: jpy14y30p +convnext_tiny_w8a16_quantized_QNN-cs_auto_lemans_8775: jp0z1xz05 +convnext_tiny_w8a16_quantized_QNN-cs_auto_lemans_8650: jp8q3kqqp +convnext_tiny_w8a16_quantized_QNN-cs_auto_makena_8295: jgkelkevg +convnext_tiny_w8a16_quantized_QNN-cs_xr_8450: j5q67d6ep +convnext_tiny_w8a16_quantized_ONNX-cs_8_gen_2: jglv0qv25 +convnext_tiny_w8a16_quantized_ONNX-cs_8_gen_3: j56y30ynp +convnext_tiny_w8a16_quantized_ONNX-cs_8_elite: jp3j4rjmg +convnext_tiny_w8a16_quantized_ONNX-cs_x_elite: jgo21921p +convnext_tiny_w8a16_quantized_ONNX_DML_NPU-cs_x_elite: jpv61n6z5 +convnext_tiny_TFLITE-cs_8_gen_2: jpedrnd85 +convnext_tiny_TFLITE-cs_8_gen_3: jgz3x0345 +convnext_tiny_TFLITE-cs_8_elite: j5wedre45 +convnext_tiny_TFLITE-cs_8550: jg9l3qlmg +convnext_tiny_TFLITE-cs_auto_lemans_8255: jp14dm4np +convnext_tiny_TFLITE-cs_auto_lemans_8775: jgdxrmx6p +convnext_tiny_TFLITE-cs_auto_lemans_8650: j57yj8yn5 +convnext_tiny_TFLITE-cs_auto_makena_8295: jp4lx2l25 +convnext_tiny_TFLITE-cs_xr_8450: jpxk7zk85 +convnext_tiny_QNN-cs_8_gen_2: j5mnwln7p +convnext_tiny_QNN-cs_8_gen_3: jgn69w6j5 +convnext_tiny_QNN-cs_8_elite: jprv47vkg +convnext_tiny_QNN-cs_8550: jp2k7zk6p +convnext_tiny_QNN-cs_x_elite: jpy14y10p +convnext_tiny_QNN-cs_auto_lemans_8255: jp0z1x405 +convnext_tiny_QNN-cs_auto_lemans_8775: jp8q3k2qp +convnext_tiny_QNN-cs_auto_lemans_8650: jgkelkvvg +convnext_tiny_QNN-cs_auto_makena_8295: j5q67d0ep +convnext_tiny_QNN-cs_xr_8450: jglv0q425 +convnext_tiny_ONNX-cs_8_gen_2: j56y302np +convnext_tiny_ONNX-cs_8_gen_3: jp3j4rnmg +convnext_tiny_ONNX-cs_8_elite: jgo219z1p +convnext_tiny_ONNX-cs_x_elite: jpv61nqz5 +convnext_tiny_ONNX_DML_GPU-cs_x_elite: jgjv08d1g +convnext_tiny_ONNX_DML_NPU-cs_x_elite: jpedrno85 +aotgan_TFLITE-cs_8_gen_2: jp14dm2np +aotgan_TFLITE-cs_8_gen_3: jgdxrmn6p +aotgan_TFLITE-cs_8_elite: j5wedrwz5 +aotgan_TFLITE-cs_8550: jg9l3q0qg +aotgan_TFLITE-cs_auto_lemans_8255: jp14dm2kp +aotgan_TFLITE-cs_auto_lemans_8775: jgdxrmnkp +aotgan_TFLITE-cs_auto_lemans_8650: j57yj82q5 +aotgan_TFLITE-cs_auto_makena_8295: jp4lx2nq5 +aotgan_TFLITE-cs_xr_8450: jpxk7z9j5 +aotgan_QNN-cs_8_gen_2: j5mnwleyp +aotgan_QNN-cs_8_gen_3: jgn69wlv5 +aotgan_QNN-cs_8_elite: jprv478vg +aotgan_QNN-cs_8550: jp2k7z0xp +aotgan_QNN-cs_x_elite: jpy14yrrp +aotgan_QNN-cs_auto_lemans_8255: jp0z1x325 +aotgan_QNN-cs_auto_lemans_8775: jp8q3k0zp +aotgan_QNN-cs_auto_lemans_8650: jgkelk7yg +aotgan_QNN-cs_auto_makena_8295: j5q67de7p +aotgan_QNN-cs_xr_8450: jglv0q6e5 +aotgan_ONNX-cs_8_gen_2: j56y30evp +aotgan_ONNX-cs_8_gen_3: jp3j4rvxg +aotgan_ONNX-cs_8_elite: jgo219k4p +aotgan_ONNX-cs_x_elite: jpv61n075 +aotgan_ONNX_DML_GPU-cs_x_elite: jgjv08z7g +aotgan_ONNX_DML_NPU-cs_x_elite: jpedrne75 +xlsr_quantized_TFLITE-cs_8_gen_2: jg9l3qjqg +xlsr_quantized_TFLITE-cs_8_gen_3: jp14dmykp +xlsr_quantized_TFLITE-cs_8_elite: jgdxrmekp +xlsr_quantized_TFLITE-cs_6490: j57yj80q5 +xlsr_quantized_TFLITE-cs_8250: jp4lx2kq5 +xlsr_quantized_TFLITE-cs_8550: jpxk7znj5 +xlsr_quantized_TFLITE-cs_auto_lemans_8255: j5mnwlqyp +xlsr_quantized_TFLITE-cs_auto_lemans_8775: jgn69wmv5 +xlsr_quantized_TFLITE-cs_auto_lemans_8650: jprv472vg +xlsr_quantized_TFLITE-cs_auto_makena_8295: jp2k7z9xp +xlsr_quantized_TFLITE-cs_xr_8450: jpy14yjrp +xlsr_quantized_QNN-cs_8_gen_2: jp0z1x225 +xlsr_quantized_QNN-cs_8_gen_3: jp8q3kmzp +xlsr_quantized_QNN-cs_8_elite: jgkelkqyg +xlsr_quantized_QNN-cs_6490: j5q67dr7p +xlsr_quantized_QNN-cs_8550: jglv0q2e5 +xlsr_quantized_QNN-cs_x_elite: j56y30zvp +xlsr_quantized_QNN-cs_auto_lemans_8255: jp3j4r1xg +xlsr_quantized_QNN-cs_auto_lemans_8775: jgo219n4p +xlsr_quantized_QNN-cs_auto_lemans_8650: jpv61nr75 +xlsr_quantized_QNN-cs_auto_makena_8295: jgjv0827g +xlsr_quantized_QNN-cs_xr_8450: jpedrnw75 +xlsr_quantized_ONNX-cs_8_gen_2: j5wedr3z5 +xlsr_quantized_ONNX-cs_8_gen_3: jg9l3qyqg +xlsr_quantized_ONNX-cs_8_elite: jp14dmwkp +xlsr_quantized_ONNX-cs_x_elite: jgdxrmqkp +xlsr_quantized_ONNX_DML_NPU-cs_x_elite: j57yj8lq5 +wideresnet50_quantized_TFLITE-cs_8_gen_2: jgn69w3v5 +wideresnet50_quantized_TFLITE-cs_8_gen_3: jprv47evg +wideresnet50_quantized_TFLITE-cs_8_elite: jp2k7zlxp +wideresnet50_quantized_TFLITE-cs_6490: jpy14y6rp +wideresnet50_quantized_TFLITE-cs_8250: jp0z1xl25 +wideresnet50_quantized_TFLITE-cs_8550: jp8q3kzzp +wideresnet50_quantized_TFLITE-cs_auto_lemans_8255: jgkelk3yg +wideresnet50_quantized_TFLITE-cs_auto_lemans_8775: j5q67d37p +wideresnet50_quantized_TFLITE-cs_auto_lemans_8650: jglv0q3e5 +wideresnet50_quantized_TFLITE-cs_auto_makena_8295: j56y30nvp +wideresnet50_quantized_TFLITE-cs_xr_8450: jp3j4rexg +wideresnet50_quantized_QNN-cs_8_gen_2: jgo21934p +wideresnet50_quantized_QNN-cs_8_gen_3: jpv61nv75 +wideresnet50_quantized_QNN-cs_8_elite: jgjv08e7g +wideresnet50_quantized_QNN-cs_6490: jpedrnk75 +wideresnet50_quantized_QNN-cs_8550: jgz3x0rz5 +wideresnet50_quantized_QNN-cs_x_elite: j5wedrqz5 +wideresnet50_quantized_QNN-cs_auto_lemans_8255: jg9l3qwqg +wideresnet50_quantized_QNN-cs_auto_lemans_8775: jp14dmekp +wideresnet50_quantized_QNN-cs_auto_lemans_8650: jgdxrmokp +wideresnet50_quantized_QNN-cs_auto_makena_8295: j57yj8xq5 +wideresnet50_quantized_QNN-cs_xr_8450: jp4lx2vq5 +wideresnet50_quantized_ONNX-cs_8_gen_2: jpxk7zyj5 +wideresnet50_quantized_ONNX-cs_8_gen_3: j5mnwl3yp +wideresnet50_quantized_ONNX-cs_8_elite: jgn69wev5 +wideresnet50_quantized_ONNX-cs_x_elite: jprv47yvg +wideresnet50_quantized_ONNX_DML_NPU-cs_x_elite: jp2k7zmxp +vit_quantized_TFLITE-cs_8_gen_2: jgkelkyyg +vit_quantized_TFLITE-cs_8_gen_3: j5q67d27p +vit_quantized_TFLITE-cs_8_elite: jglv0qke5 +vit_quantized_TFLITE-cs_6490: j56y301vp +vit_quantized_TFLITE-cs_8250: jp3j4rmxg +vit_quantized_TFLITE-cs_8550: jgo219v4p +vit_quantized_TFLITE-cs_auto_lemans_8255: jpv61nw75 +vit_quantized_TFLITE-cs_auto_lemans_8775: jgjv08l7g +vit_quantized_TFLITE-cs_auto_lemans_8650: jpedrnv75 +vit_quantized_TFLITE-cs_auto_makena_8295: jgz3x07z5 +vit_quantized_TFLITE-cs_xr_8450: j5wedr9z5 +vit_quantized_QNN-cs_8_gen_2: jg9l3q4qg +vit_quantized_QNN-cs_8_gen_3: jp14dm8kp +vit_quantized_QNN-cs_8_elite: jgdxrmvkp +vit_quantized_QNN-cs_6490: j5wedr9j5 +vit_quantized_QNN-cs_8550: jg9l3q4vg +vit_quantized_QNN-cs_x_elite: jp14dm8lp +vit_quantized_QNN-cs_auto_lemans_8255: jgdxrmvlp +vit_quantized_QNN-cs_auto_lemans_8775: j57yj8jr5 +vit_quantized_QNN-cs_auto_lemans_8650: jp4lx2xl5 +vit_quantized_QNN-cs_auto_makena_8295: jpxk7z795 +vit_quantized_QNN-cs_xr_8450: j5mnwlwqp +vit_quantized_ONNX-cs_8_gen_2: jgn69w9m5 +vit_quantized_ONNX-cs_8_gen_3: jprv474eg +vit_quantized_ONNX-cs_8_elite: jp2k7z7mp +vit_quantized_ONNX-cs_x_elite: jpy14y44p +vit_quantized_ONNX_DML_NPU-cs_x_elite: jp0z1x1e5 +squeezenet1_1_quantized_TFLITE-cs_8_gen_2: j5q67d7mp +squeezenet1_1_quantized_TFLITE-cs_8_gen_3: jglv0q0l5 +squeezenet1_1_quantized_TFLITE-cs_8_elite: j56y3037p +squeezenet1_1_quantized_TFLITE-cs_6490: jp3j4r4zg +squeezenet1_1_quantized_TFLITE-cs_8250: jgo2191dp +squeezenet1_1_quantized_TFLITE-cs_8550: jpv61n1m5 +squeezenet1_1_quantized_TFLITE-cs_auto_lemans_8255: jgjv0808g +squeezenet1_1_quantized_TFLITE-cs_auto_lemans_8775: jgz3x0x65 +squeezenet1_1_quantized_TFLITE-cs_auto_lemans_8650: j5wedrdj5 +squeezenet1_1_quantized_TFLITE-cs_auto_makena_8295: jg9l3q3vg +squeezenet1_1_quantized_TFLITE-cs_xr_8450: jp14dmdlp +squeezenet1_1_quantized_QNN-cs_8_gen_2: jgdxrmrlp +squeezenet1_1_quantized_QNN-cs_8_gen_3: j57yj8vr5 +squeezenet1_1_quantized_QNN-cs_8_elite: jp4lx2jl5 +squeezenet1_1_quantized_QNN-cs_6490: jpxk7ze95 +squeezenet1_1_quantized_QNN-cs_8550: j5mnwlvqp +squeezenet1_1_quantized_QNN-cs_x_elite: jgn69wrm5 +squeezenet1_1_quantized_QNN-cs_auto_lemans_8255: jprv471eg +squeezenet1_1_quantized_QNN-cs_auto_lemans_8775: jp2k7z3mp +squeezenet1_1_quantized_QNN-cs_auto_lemans_8650: jpy14yv4p +squeezenet1_1_quantized_QNN-cs_auto_makena_8295: jp8q3kw8p +squeezenet1_1_quantized_QNN-cs_xr_8450: jgkelkrog +squeezenet1_1_quantized_ONNX-cs_8_gen_2: j5q67d9mp +squeezenet1_1_quantized_ONNX-cs_8_gen_3: jglv0qel5 +squeezenet1_1_quantized_ONNX-cs_8_elite: j56y30q7p +squeezenet1_1_quantized_ONNX-cs_x_elite: jp3j4rqzg +squeezenet1_1_quantized_ONNX_DML_NPU-cs_x_elite: jgo219edp +shufflenet_v2_quantized_TFLITE-cs_8_gen_2: j5wedrmj5 +shufflenet_v2_quantized_TFLITE-cs_8_gen_3: jg9l3q9vg +shufflenet_v2_quantized_TFLITE-cs_8_elite: jp14dmqlp +shufflenet_v2_quantized_TFLITE-cs_6490: jgdxrm7lp +shufflenet_v2_quantized_TFLITE-cs_8250: j57yj86r5 +shufflenet_v2_quantized_TFLITE-cs_8550: jp4lx28l5 +shufflenet_v2_quantized_TFLITE-cs_auto_lemans_8255: jpxk7zm95 +shufflenet_v2_quantized_TFLITE-cs_auto_lemans_8775: j5mnwl4qp +shufflenet_v2_quantized_TFLITE-cs_auto_lemans_8650: jgn69wxm5 +shufflenet_v2_quantized_TFLITE-cs_auto_makena_8295: jprv479eg +shufflenet_v2_quantized_TFLITE-cs_xr_8450: jp2k7zjmp +shufflenet_v2_quantized_QNN-cs_8_gen_2: jpy14yn4p +shufflenet_v2_quantized_QNN-cs_8_gen_3: jp0z1xke5 +shufflenet_v2_quantized_QNN-cs_8_elite: jp8q3k88p +shufflenet_v2_quantized_QNN-cs_6490: jgkelkdog +shufflenet_v2_quantized_QNN-cs_8550: j5q67dwmp +shufflenet_v2_quantized_QNN-cs_x_elite: jglv0q7l5 +shufflenet_v2_quantized_QNN-cs_auto_lemans_8255: j56y30v7p +shufflenet_v2_quantized_QNN-cs_auto_lemans_8775: jp3j4r8zg +shufflenet_v2_quantized_QNN-cs_auto_lemans_8650: jgo219mdp +shufflenet_v2_quantized_QNN-cs_auto_makena_8295: jpv61n4m5 +shufflenet_v2_quantized_QNN-cs_xr_8450: jgjv0818g +shufflenet_v2_quantized_ONNX-cs_8_gen_2: jpedrn205 +shufflenet_v2_quantized_ONNX-cs_8_gen_3: jgz3x0w65 +shufflenet_v2_quantized_ONNX-cs_8_elite: j5wedrxj5 +shufflenet_v2_quantized_ONNX-cs_x_elite: jg9l3q8vg +shufflenet_v2_quantized_ONNX_DML_NPU-cs_x_elite: jp14dm3lp +sesr_m5_quantized_TFLITE-cs_8_gen_2: jpxk7z395 +sesr_m5_quantized_TFLITE-cs_8_gen_3: j5mnwloqp +sesr_m5_quantized_TFLITE-cs_8_elite: jgn69wom5 +sesr_m5_quantized_TFLITE-cs_6490: jprv47oeg +sesr_m5_quantized_TFLITE-cs_8250: jp2k7z4mp +sesr_m5_quantized_TFLITE-cs_8550: jpy14yq4p +sesr_m5_quantized_TFLITE-cs_auto_lemans_8255: jp0z1xde5 +sesr_m5_quantized_TFLITE-cs_auto_lemans_8775: jp8q3k68p +sesr_m5_quantized_TFLITE-cs_auto_lemans_8650: jgkelkoog +sesr_m5_quantized_TFLITE-cs_auto_makena_8295: j5q67dzmp +sesr_m5_quantized_TFLITE-cs_xr_8450: jglv0qol5 +sesr_m5_quantized_QNN-cs_8_gen_2: jp3j4rxzg +sesr_m5_quantized_QNN-cs_8_gen_3: jgo219odp +sesr_m5_quantized_QNN-cs_8_elite: jpv61nem5 +sesr_m5_quantized_QNN-cs_6490: jgjv08o8g +sesr_m5_quantized_QNN-cs_8550: jpedrn805 +sesr_m5_quantized_QNN-cs_x_elite: jgz3x0865 +sesr_m5_quantized_QNN-cs_auto_lemans_8255: j5wedr8j5 +sesr_m5_quantized_QNN-cs_auto_lemans_8775: jg9l3qkvg +sesr_m5_quantized_QNN-cs_auto_lemans_8650: jp14dm7lp +sesr_m5_quantized_QNN-cs_auto_makena_8295: jgdxrm8lp +sesr_m5_quantized_QNN-cs_xr_8450: j5wedr165 +sesr_m5_quantized_ONNX-cs_8_gen_2: jg9l3qxlg +sesr_m5_quantized_ONNX-cs_8_gen_3: jp14dmv2p +sesr_m5_quantized_ONNX-cs_8_elite: jgdxrmzep +sesr_m5_quantized_ONNX-cs_x_elite: j57yj87l5 +sesr_m5_quantized_ONNX_DML_NPU-cs_x_elite: jp4lx29v5 +resnext50_quantized_TFLITE-cs_8_gen_2: jprv47n9g +resnext50_quantized_TFLITE-cs_8_gen_3: jp2k7zv4p +resnext50_quantized_TFLITE-cs_8_elite: jpy14y77p +resnext50_quantized_TFLITE-cs_6490: jp0z1xv65 +resnext50_quantized_TFLITE-cs_8250: jp8q3k4xp +resnext50_quantized_TFLITE-cs_8550: jgkelk92g +resnext50_quantized_TFLITE-cs_auto_lemans_8255: j5q67dm4p +resnext50_quantized_TFLITE-cs_auto_lemans_8775: jglv0q185 +resnext50_quantized_TFLITE-cs_auto_lemans_8650: j56y30d0p +resnext50_quantized_TFLITE-cs_auto_makena_8295: jp3j4rwlg +resnext50_quantized_TFLITE-cs_xr_8450: jgo2194xp +resnext50_quantized_QNN-cs_8_gen_2: jpv61n9j5 +resnext50_quantized_QNN-cs_8_gen_3: jgjv08wxg +resnext50_quantized_QNN-cs_8_elite: jpedrnl15 +resnext50_quantized_QNN-cs_6490: jgz3x04k5 +resnext50_quantized_QNN-cs_8550: j5wedr465 +resnext50_quantized_QNN-cs_x_elite: jg9l3qdlg +resnext50_quantized_QNN-cs_auto_lemans_8255: jp14dm62p +resnext50_quantized_QNN-cs_auto_lemans_8775: jgdxrm2ep +resnext50_quantized_QNN-cs_auto_lemans_8650: j57yj89l5 +resnext50_quantized_QNN-cs_auto_makena_8295: jp4lx23v5 +resnext50_quantized_QNN-cs_xr_8450: jpxk7zx15 +resnext50_quantized_ONNX-cs_8_gen_2: j5mnwl8wp +resnext50_quantized_ONNX-cs_8_gen_3: jgn69wkr5 +resnext50_quantized_ONNX-cs_8_elite: jprv47w9g +resnext50_quantized_ONNX-cs_x_elite: jp2k7ze4p +resnext50_quantized_ONNX_DML_NPU-cs_x_elite: jpy14ym7p +resnext101_quantized_TFLITE-cs_8_gen_2: j5q67dv4p +resnext101_quantized_TFLITE-cs_8_gen_3: jglv0ql85 +resnext101_quantized_TFLITE-cs_8_elite: j56y30w0p +resnext101_quantized_TFLITE-cs_6490: jp3j4r6lg +resnext101_quantized_TFLITE-cs_8250: jgo2198xp +resnext101_quantized_TFLITE-cs_8550: jpv61n7j5 +resnext101_quantized_TFLITE-cs_auto_lemans_8255: jgjv08qxg +resnext101_quantized_TFLITE-cs_auto_lemans_8775: jpedrny15 +resnext101_quantized_TFLITE-cs_auto_lemans_8650: jgz3x0nk5 +resnext101_quantized_TFLITE-cs_auto_makena_8295: j5wedr765 +resnext101_quantized_TFLITE-cs_xr_8450: jg9l3qmlg +resnext101_quantized_QNN-cs_8_gen_2: jp14dmj2p +resnext101_quantized_QNN-cs_8_gen_3: jgdxrm3ep +resnext101_quantized_QNN-cs_8_elite: j57yj84l5 +resnext101_quantized_QNN-cs_6490: jp4lx21v5 +resnext101_quantized_QNN-cs_8550: jpxk7z415 +resnext101_quantized_QNN-cs_x_elite: j5mnwlmwp +resnext101_quantized_QNN-cs_auto_lemans_8255: jgn69wnr5 +resnext101_quantized_QNN-cs_auto_lemans_8775: jprv4709g +resnext101_quantized_QNN-cs_auto_lemans_8650: jp2k7zw4p +resnext101_quantized_QNN-cs_auto_makena_8295: jpy14yx7p +resnext101_quantized_QNN-cs_xr_8450: jp0z1xj65 +resnext101_quantized_ONNX-cs_8_gen_2: jp8q3kxxp +resnext101_quantized_ONNX-cs_8_gen_3: jgkelk42g +resnext101_quantized_ONNX-cs_8_elite: j5q67dy4p +resnext101_quantized_ONNX-cs_x_elite: jglv0qx85 +resnext101_quantized_ONNX_DML_NPU-cs_x_elite: j56y3070p +resnet50_quantized_TFLITE-cs_8_gen_2: jgjv087xg +resnet50_quantized_TFLITE-cs_8_gen_3: jpedrnz15 +resnet50_quantized_TFLITE-cs_8_elite: jgz3x0mk5 +resnet50_quantized_TFLITE-cs_6490: j5wedrl65 +resnet50_quantized_TFLITE-cs_8250: jg9l3qzlg +resnet50_quantized_TFLITE-cs_8550: jp14dmn2p +resnet50_quantized_TFLITE-cs_auto_lemans_8255: jgdxrmdep +resnet50_quantized_TFLITE-cs_auto_lemans_8775: j57yj8el5 +resnet50_quantized_TFLITE-cs_auto_lemans_8650: jp4lx2yv5 +resnet50_quantized_TFLITE-cs_auto_makena_8295: jpxk7zl15 +resnet50_quantized_TFLITE-cs_xr_8450: j5mnwl0wp +resnet50_quantized_QNN-cs_8_gen_2: jgn69wzr5 +resnet50_quantized_QNN-cs_8_gen_3: jprv47l9g +resnet50_quantized_QNN-cs_8_elite: jp2k7zr4p +resnet50_quantized_QNN-cs_6490: jpy14yo7p +resnet50_quantized_QNN-cs_8550: jp0z1xm65 +resnet50_quantized_QNN-cs_x_elite: jp8q3kexp +resnet50_quantized_QNN-cs_auto_lemans_8255: jgkelk22g +resnet50_quantized_QNN-cs_auto_lemans_8775: j5q67dl4p +resnet50_quantized_QNN-cs_auto_lemans_8650: jglv0qy85 +resnet50_quantized_QNN-cs_auto_makena_8295: j56y3080p +resnet50_quantized_QNN-cs_xr_8450: jp3j4rzlg +resnet50_quantized_ONNX-cs_8_gen_2: jgo219lxp +resnet50_quantized_ONNX-cs_8_gen_3: jpv61nlj5 +resnet50_quantized_ONNX-cs_8_elite: jgjv08rxg +resnet50_quantized_ONNX-cs_x_elite: jpedrn715 +resnet50_quantized_ONNX_DML_NPU-cs_x_elite: jgz3x0lk5 +resnet18_quantized_TFLITE-cs_8_gen_2: jgdxrm6ep +resnet18_quantized_TFLITE-cs_8_gen_3: j5wedry35 +resnet18_quantized_TFLITE-cs_8_elite: jg9l3qowg +resnet18_quantized_TFLITE-cs_6490: jp14dmo8p +resnet18_quantized_TFLITE-cs_8250: jgdxrm6rp +resnet18_quantized_TFLITE-cs_8550: j57yj8ov5 +resnet18_quantized_TFLITE-cs_auto_lemans_8255: jp4lx2e85 +resnet18_quantized_TFLITE-cs_auto_lemans_8775: jpxk7z035 +resnet18_quantized_TFLITE-cs_auto_lemans_8650: j5mnwl9dp +resnet18_quantized_TFLITE-cs_auto_makena_8295: jgn69w1k5 +resnet18_quantized_TFLITE-cs_xr_8450: jprv47x0g +resnet18_quantized_QNN-cs_8_gen_2: jp2k7zorp +resnet18_quantized_QNN-cs_8_gen_3: jpy14y88p +resnet18_quantized_QNN-cs_8_elite: jp8q3kjkp +resnet18_quantized_QNN-cs_6490: jgkelk6wg +resnet18_quantized_QNN-cs_8550: j5q67d4np +resnet18_quantized_QNN-cs_x_elite: jglv0qwj5 +resnet18_quantized_QNN-cs_auto_lemans_8255: j56y30o6p +resnet18_quantized_QNN-cs_auto_lemans_8775: jp3j4ro3g +resnet18_quantized_QNN-cs_auto_lemans_8650: jgo219dqp +resnet18_quantized_QNN-cs_auto_makena_8295: jpv61nmk5 +resnet18_quantized_QNN-cs_xr_8450: jgjv08yvg +resnet18_quantized_ONNX-cs_8_gen_2: jpedrnxo5 +resnet18_quantized_ONNX-cs_8_gen_3: jgz3x0yo5 +resnet18_quantized_ONNX-cs_8_elite: j5wedrz35 +resnet18_quantized_ONNX-cs_x_elite: jg9l3q2wg +resnet18_quantized_ONNX_DML_NPU-cs_x_elite: jp14dm18p +resnet101_quantized_TFLITE-cs_8_gen_2: j5mnwlkdp +resnet101_quantized_TFLITE-cs_8_gen_3: jgn69wqk5 +resnet101_quantized_TFLITE-cs_8_elite: jp2k7zdrp +resnet101_quantized_TFLITE-cs_6490: jpy14y28p +resnet101_quantized_TFLITE-cs_8250: jp0z1x995 +resnet101_quantized_TFLITE-cs_8550: jp8q3krkp +resnet101_quantized_TFLITE-cs_auto_lemans_8255: jgkelk0wg +resnet101_quantized_TFLITE-cs_auto_lemans_8775: j5q67d1np +resnet101_quantized_TFLITE-cs_auto_lemans_8650: jglv0q8j5 +resnet101_quantized_TFLITE-cs_auto_makena_8295: j56y30m6p +resnet101_quantized_TFLITE-cs_xr_8450: jp3j4r73g +resnet101_quantized_QNN-cs_8_gen_2: jgo219wqp +resnet101_quantized_QNN-cs_8_gen_3: jpv61nnk5 +resnet101_quantized_QNN-cs_8_elite: jpxk7w015 +resnet101_quantized_QNN-cs_6490: jpedrnno5 +resnet101_quantized_QNN-cs_8550: jgz3x00o5 +resnet101_quantized_QNN-cs_x_elite: j5wedrr35 +resnet101_quantized_QNN-cs_auto_lemans_8255: jg9l3qqwg +resnet101_quantized_QNN-cs_auto_lemans_8775: jp14dmm8p +resnet101_quantized_QNN-cs_auto_lemans_8650: jgdxrmmrp +resnet101_quantized_QNN-cs_auto_makena_8295: j57yj88v5 +resnet101_quantized_QNN-cs_xr_8450: jp4lx2285 +resnet101_quantized_ONNX-cs_8_gen_2: jpxk7zz35 +resnet101_quantized_ONNX-cs_8_gen_3: j5mnwlldp +resnet101_quantized_ONNX-cs_8_elite: jgn69wwk5 +resnet101_quantized_ONNX-cs_x_elite: jprv4770g +resnet101_quantized_ONNX_DML_NPU-cs_x_elite: jp2k7zzrp +regnet_quantized_TFLITE-cs_8_gen_2: jgkelkkwg +regnet_quantized_TFLITE-cs_8_gen_3: j5q67ddnp +regnet_quantized_TFLITE-cs_8_elite: jglv0qqj5 +regnet_quantized_TFLITE-cs_6490: j56y3006p +regnet_quantized_TFLITE-cs_8250: jp3j4rr3g +regnet_quantized_TFLITE-cs_8550: jgo2199qp +regnet_quantized_TFLITE-cs_auto_lemans_8255: jpv61nyk5 +regnet_quantized_TFLITE-cs_auto_lemans_8775: jgjv086vg +regnet_quantized_TFLITE-cs_auto_lemans_8650: jpedrn0o5 +regnet_quantized_TFLITE-cs_auto_makena_8295: jgz3x0qo5 +regnet_quantized_TFLITE-cs_xr_8450: j5wedr035 +regnet_quantized_QNN-cs_8_gen_2: jg9l3q7wg +regnet_quantized_QNN-cs_8_gen_3: jp14dmk8p +regnet_quantized_QNN-cs_8_elite: jgdxrmyrp +regnet_quantized_QNN-cs_6490: j57yj81v5 +regnet_quantized_QNN-cs_8550: jp4lx2685 +regnet_quantized_QNN-cs_x_elite: jpxk7z835 +regnet_quantized_QNN-cs_auto_lemans_8255: j5mnwl1dp +regnet_quantized_QNN-cs_auto_lemans_8775: jgn69wdk5 +regnet_quantized_QNN-cs_auto_lemans_8650: jprv47m0g +regnet_quantized_QNN-cs_auto_makena_8295: jp2k7zqrp +regnet_quantized_QNN-cs_xr_8450: jpy14yk8p +regnet_quantized_ONNX-cs_8_gen_2: jp0z1x895 +regnet_quantized_ONNX-cs_8_gen_3: jp8q3kdkp +regnet_quantized_ONNX-cs_8_elite: jgkelkwwg +regnet_quantized_ONNX-cs_x_elite: j5q67dxnp +regnet_quantized_ONNX_DML_NPU-cs_x_elite: jglv0q9j5 +quicksrnetsmall_quantized_TFLITE-cs_8_gen_2: jpv61n8k5 +quicksrnetsmall_quantized_TFLITE-cs_8_gen_3: jgjv089vg +quicksrnetsmall_quantized_TFLITE-cs_8_elite: jpedrnqo5 +quicksrnetsmall_quantized_TFLITE-cs_6490: jgz3x06o5 +quicksrnetsmall_quantized_TFLITE-cs_8250: j5wedrk35 +quicksrnetsmall_quantized_TFLITE-cs_8550: jg9l3qrwg +quicksrnetsmall_quantized_TFLITE-cs_auto_lemans_8255: jp14dm98p +quicksrnetsmall_quantized_TFLITE-cs_auto_lemans_8775: jgdxrmkrp +quicksrnetsmall_quantized_TFLITE-cs_auto_lemans_8650: j5wedrkm5 +quicksrnetsmall_quantized_TFLITE-cs_auto_makena_8295: jg9l3qr8g +quicksrnetsmall_quantized_TFLITE-cs_xr_8450: jp14dm97p +quicksrnetsmall_quantized_QNN-cs_8_gen_2: jgdxrmkzp +quicksrnetsmall_quantized_QNN-cs_8_gen_3: j57yj8m95 +quicksrnetsmall_quantized_QNN-cs_8_elite: jp4lx2715 +quicksrnetsmall_quantized_QNN-cs_6490: jpxk7zql5 +quicksrnetsmall_quantized_QNN-cs_8550: j5mnwl79p +quicksrnetsmall_quantized_QNN-cs_x_elite: jgn69w4q5 +quicksrnetsmall_quantized_QNN-cs_auto_lemans_8255: jprv47r7g +quicksrnetsmall_quantized_QNN-cs_auto_lemans_8775: jp2k7z1qp +quicksrnetsmall_quantized_QNN-cs_auto_lemans_8650: jpy14yllp +quicksrnetsmall_quantized_QNN-cs_auto_makena_8295: jp0z1xwn5 +quicksrnetsmall_quantized_QNN-cs_xr_8450: jp8q3knop +quicksrnetsmall_quantized_ONNX-cs_8_gen_2: jgkelk1ng +quicksrnetsmall_quantized_ONNX-cs_8_gen_3: j5q67dnop +quicksrnetsmall_quantized_ONNX-cs_8_elite: jglv0qjm5 +quicksrnetsmall_quantized_ONNX-cs_x_elite: j56y30kyp +quicksrnetsmall_quantized_ONNX_DML_NPU-cs_x_elite: jp3j4ryng +quicksrnetmedium_quantized_TFLITE-cs_8_gen_2: jpedrnjv5 +quicksrnetmedium_quantized_TFLITE-cs_8_gen_3: jgz3x01x5 +quicksrnetmedium_quantized_TFLITE-cs_8_elite: j5wedrjm5 +quicksrnetmedium_quantized_TFLITE-cs_6490: jg9l3q68g +quicksrnetmedium_quantized_TFLITE-cs_8250: jp14dmr7p +quicksrnetmedium_quantized_TFLITE-cs_8550: jgdxrmjzp +quicksrnetmedium_quantized_TFLITE-cs_auto_lemans_8255: j57yj8q95 +quicksrnetmedium_quantized_TFLITE-cs_auto_lemans_8775: jp4lx2z15 +quicksrnetmedium_quantized_TFLITE-cs_auto_lemans_8650: jpxk7zwl5 +quicksrnetmedium_quantized_TFLITE-cs_auto_makena_8295: j5mnwlj9p +quicksrnetmedium_quantized_TFLITE-cs_xr_8450: jgn69wjq5 +quicksrnetmedium_quantized_QNN-cs_8_gen_2: jprv47z7g +quicksrnetmedium_quantized_QNN-cs_8_gen_3: jp2k7z2qp +quicksrnetmedium_quantized_QNN-cs_8_elite: jpy14y9lp +quicksrnetmedium_quantized_QNN-cs_6490: jp0z1xnn5 +quicksrnetmedium_quantized_QNN-cs_8550: jp8q3klop +quicksrnetmedium_quantized_QNN-cs_x_elite: jgkelkjng +quicksrnetmedium_quantized_QNN-cs_auto_lemans_8255: j5q67djop +quicksrnetmedium_quantized_QNN-cs_auto_lemans_8775: jglv0qnm5 +quicksrnetmedium_quantized_QNN-cs_auto_lemans_8650: j56y306yp +quicksrnetmedium_quantized_QNN-cs_auto_makena_8295: jp3j4rkng +quicksrnetmedium_quantized_QNN-cs_xr_8450: jgo219ykp +quicksrnetmedium_quantized_ONNX-cs_8_gen_2: jpv61n3r5 +quicksrnetmedium_quantized_ONNX-cs_8_gen_3: jgjv08xeg +quicksrnetmedium_quantized_ONNX-cs_8_elite: jpedrn9v5 +quicksrnetmedium_quantized_ONNX-cs_x_elite: jgz3x0ex5 +quicksrnetmedium_quantized_ONNX_DML_NPU-cs_x_elite: j5wedrom5 +quicksrnetlarge_quantized_TFLITE-cs_8_gen_2: j57yj8z95 +quicksrnetlarge_quantized_TFLITE-cs_8_gen_3: jp4lx2q15 +quicksrnetlarge_quantized_TFLITE-cs_8_elite: jpxk7zvl5 +quicksrnetlarge_quantized_TFLITE-cs_6490: j5mnwlr9p +quicksrnetlarge_quantized_TFLITE-cs_8250: jgn69w2q5 +quicksrnetlarge_quantized_TFLITE-cs_8550: jprv47k7g +quicksrnetlarge_quantized_TFLITE-cs_auto_lemans_8255: jp2k7z8qp +quicksrnetlarge_quantized_TFLITE-cs_auto_lemans_8775: jpy14yelp +quicksrnetlarge_quantized_TFLITE-cs_auto_lemans_8650: jp0z1xyn5 +quicksrnetlarge_quantized_TFLITE-cs_auto_makena_8295: jp8q3koop +quicksrnetlarge_quantized_TFLITE-cs_xr_8450: jgkelkzng +quicksrnetlarge_quantized_QNN-cs_8_gen_2: j5q67d8op +quicksrnetlarge_quantized_QNN-cs_8_gen_3: jglv0qzm5 +quicksrnetlarge_quantized_QNN-cs_8_elite: j56y30jyp +quicksrnetlarge_quantized_QNN-cs_6490: jp3j4r3ng +quicksrnetlarge_quantized_QNN-cs_8550: jgo2190kp +quicksrnetlarge_quantized_QNN-cs_x_elite: jpv61nor5 +quicksrnetlarge_quantized_QNN-cs_auto_lemans_8255: jgjv08meg +quicksrnetlarge_quantized_QNN-cs_auto_lemans_8775: jpedrn1v5 +quicksrnetlarge_quantized_QNN-cs_auto_lemans_8650: jgz3x09x5 +quicksrnetlarge_quantized_QNN-cs_auto_makena_8295: j5wedrvm5 +quicksrnetlarge_quantized_QNN-cs_xr_8450: jg9l3q18g +quicksrnetlarge_quantized_ONNX-cs_8_gen_2: jp14dml7p +quicksrnetlarge_quantized_ONNX-cs_8_gen_3: jgdxrm9zp +quicksrnetlarge_quantized_ONNX-cs_8_elite: j57yj8w95 +quicksrnetlarge_quantized_ONNX-cs_x_elite: jp4lx2o15 +quicksrnetlarge_quantized_ONNX_DML_NPU-cs_x_elite: jpxk7zjl5 +mobilenet_v3_large_quantized_TFLITE-cs_8_gen_2: jp2k7z6qp +mobilenet_v3_large_quantized_TFLITE-cs_8_gen_3: jpy14ywlp +mobilenet_v3_large_quantized_TFLITE-cs_8_elite: jp0z1xqn5 +mobilenet_v3_large_quantized_TFLITE-cs_6490: jp8q3k9op +mobilenet_v3_large_quantized_TFLITE-cs_8250: jgkelknng +mobilenet_v3_large_quantized_TFLITE-cs_8550: j5q67dkop +mobilenet_v3_large_quantized_TFLITE-cs_auto_lemans_8255: jglv0qrm5 +mobilenet_v3_large_quantized_TFLITE-cs_auto_lemans_8775: j56y30lyp +mobilenet_v3_large_quantized_TFLITE-cs_auto_lemans_8650: jp3j4r2ng +mobilenet_v3_large_quantized_TFLITE-cs_auto_makena_8295: jgo219qkp +mobilenet_v3_large_quantized_TFLITE-cs_xr_8450: jpv61nxr5 +mobilenet_v3_large_quantized_QNN-cs_8_gen_2: jgjv084eg +mobilenet_v3_large_quantized_QNN-cs_8_gen_3: jpedrn3v5 +mobilenet_v3_large_quantized_QNN-cs_8_elite: jgz3x0kx5 +mobilenet_v3_large_quantized_QNN-cs_6490: j5wedrnm5 +mobilenet_v3_large_quantized_QNN-cs_8550: jg9l3qe8g +mobilenet_v3_large_quantized_QNN-cs_x_elite: jp14dmx7p +mobilenet_v3_large_quantized_QNN-cs_auto_lemans_8255: jgdxrmlzp +mobilenet_v3_large_quantized_QNN-cs_auto_lemans_8775: j5wedrn45 +mobilenet_v3_large_quantized_QNN-cs_auto_lemans_8650: jg9l3qemg +mobilenet_v3_large_quantized_QNN-cs_auto_makena_8295: jp14dmxnp +mobilenet_v3_large_quantized_QNN-cs_xr_8450: jgdxrml6p +mobilenet_v3_large_quantized_ONNX-cs_8_gen_2: j57yj83n5 +mobilenet_v3_large_quantized_ONNX-cs_8_gen_3: jp4lx2025 +mobilenet_v3_large_quantized_ONNX-cs_8_elite: jpxk7z285 +mobilenet_v3_large_quantized_ONNX-cs_x_elite: j5mnwly7p +mobilenet_v3_large_quantized_ONNX_DML_NPU-cs_x_elite: jgn69w8j5 +mobilenet_v2_quantized_TFLITE-cs_8_gen_2: jp0z18005 +mobilenet_v2_quantized_TFLITE-cs_8_gen_3: jp8q3dyqp +mobilenet_v2_quantized_TFLITE-cs_8_elite: jgkelwxvg +mobilenet_v2_quantized_TFLITE-cs_6490: j5q67xqep +mobilenet_v2_quantized_TFLITE-cs_8250: jglv09m25 +mobilenet_v2_quantized_TFLITE-cs_8550: j56y394np +mobilenet_v2_quantized_TFLITE-cs_auto_lemans_8255: jp3j4l0mg +mobilenet_v2_quantized_TFLITE-cs_auto_lemans_8775: jgo21761p +mobilenet_v2_quantized_TFLITE-cs_auto_lemans_8650: jpv61ykz5 +mobilenet_v2_quantized_TFLITE-cs_auto_makena_8295: jgjv06n1g +mobilenet_v2_quantized_TFLITE-cs_xr_8450: jpedr0m85 +mobilenet_v2_quantized_QNN-cs_8_gen_2: jgz3xqd45 +mobilenet_v2_quantized_QNN-cs_8_gen_3: j5wed0645 +mobilenet_v2_quantized_QNN-cs_8_elite: jg9l37nmg +mobilenet_v2_quantized_QNN-cs_6490: jp14dkznp +mobilenet_v2_quantized_QNN-cs_8550: jgdxry16p +mobilenet_v2_quantized_QNN-cs_x_elite: j57yj1rn5 +mobilenet_v2_quantized_QNN-cs_auto_lemans_8255: jp4lx6r25 +mobilenet_v2_quantized_QNN-cs_auto_lemans_8775: jpxk78o85 +mobilenet_v2_quantized_QNN-cs_auto_lemans_8650: j5mnw1x7p +mobilenet_v2_quantized_QNN-cs_auto_makena_8295: jgn69dvj5 +mobilenet_v2_quantized_QNN-cs_xr_8450: jprv4m3kg +mobilenet_v2_quantized_ONNX-cs_8_gen_2: jp2k7qy6p +mobilenet_v2_quantized_ONNX-cs_8_gen_3: jpy14k30p +mobilenet_v2_quantized_ONNX-cs_8_elite: jp0z18z05 +mobilenet_v2_quantized_ONNX-cs_x_elite: jp8q3dqqp +mobilenet_v2_quantized_ONNX_DML_NPU-cs_x_elite: jgkelwevg +inception_v3_quantized_TFLITE-cs_8_gen_2: jp3j4ljmg +inception_v3_quantized_TFLITE-cs_8_gen_3: jgo21721p +inception_v3_quantized_TFLITE-cs_8_elite: jpv61y6z5 +inception_v3_quantized_TFLITE-cs_6490: jgjv06v1g +inception_v3_quantized_TFLITE-cs_8250: jpedr0d85 +inception_v3_quantized_TFLITE-cs_8550: jgz3xq345 +inception_v3_quantized_TFLITE-cs_auto_lemans_8255: j5wed0e45 +inception_v3_quantized_TFLITE-cs_auto_lemans_8775: jg9l37lmg +inception_v3_quantized_TFLITE-cs_auto_lemans_8650: jp14dk4np +inception_v3_quantized_TFLITE-cs_auto_makena_8295: jgdxryx6p +inception_v3_quantized_TFLITE-cs_xr_8450: j57yj1yn5 +inception_v3_quantized_QNN-cs_8_gen_2: jp4lx6l25 +inception_v3_quantized_QNN-cs_8_gen_3: jpxk78k85 +inception_v3_quantized_QNN-cs_8_elite: j5mnw1n7p +inception_v3_quantized_QNN-cs_6490: jgn69d6j5 +inception_v3_quantized_QNN-cs_8550: jprv4mvkg +inception_v3_quantized_QNN-cs_x_elite: jp2k7qk6p +inception_v3_quantized_QNN-cs_auto_lemans_8255: jpy14k10p +inception_v3_quantized_QNN-cs_auto_lemans_8775: jp0z18405 +inception_v3_quantized_QNN-cs_auto_lemans_8650: jp8q3d2qp +inception_v3_quantized_QNN-cs_auto_makena_8295: jgkelwvvg +inception_v3_quantized_QNN-cs_xr_8450: j5q67x0ep +inception_v3_quantized_ONNX-cs_8_gen_2: jglv09425 +inception_v3_quantized_ONNX-cs_8_gen_3: j56y392np +inception_v3_quantized_ONNX-cs_8_elite: jp3j4lnmg +inception_v3_quantized_ONNX-cs_x_elite: jpv61yqz5 +inception_v3_quantized_ONNX_DML_NPU-cs_x_elite: jgjv06d1g +googlenet_quantized_TFLITE-cs_8_gen_2: jg9l370mg +googlenet_quantized_TFLITE-cs_8_gen_3: jp14dk2np +googlenet_quantized_TFLITE-cs_8_elite: jgdxryn6p +googlenet_quantized_TFLITE-cs_6490: j57yj12n5 +googlenet_quantized_TFLITE-cs_8250: jp4lx6n25 +googlenet_quantized_TFLITE-cs_8550: jpxk78985 +googlenet_quantized_TFLITE-cs_auto_lemans_8255: j5mnw1e7p +googlenet_quantized_TFLITE-cs_auto_lemans_8775: jgn69d0j5 +googlenet_quantized_TFLITE-cs_auto_lemans_8650: jprv4m6kg +googlenet_quantized_TFLITE-cs_auto_makena_8295: jp2k7qx6p +googlenet_quantized_TFLITE-cs_xr_8450: jpy14kz0p +googlenet_quantized_QNN-cs_8_gen_2: jp0z18305 +googlenet_quantized_QNN-cs_8_gen_3: jp8q3d0qp +googlenet_quantized_QNN-cs_8_elite: jgkelw7vg +googlenet_quantized_QNN-cs_6490: j5q67xeep +googlenet_quantized_QNN-cs_8550: jglv09625 +googlenet_quantized_QNN-cs_x_elite: j56y39enp +googlenet_quantized_QNN-cs_auto_lemans_8255: jp3j4lvmg +googlenet_quantized_QNN-cs_auto_lemans_8775: jgo217k1p +googlenet_quantized_QNN-cs_auto_lemans_8650: jpv61y0z5 +googlenet_quantized_QNN-cs_auto_makena_8295: jgjv06z1g +googlenet_quantized_QNN-cs_xr_8450: jpedr0e85 +googlenet_quantized_ONNX-cs_8_gen_2: jgz3xqo45 +googlenet_quantized_ONNX-cs_8_gen_3: j5wed0245 +googlenet_quantized_ONNX-cs_8_elite: jg9l37jmg +googlenet_quantized_ONNX-cs_x_elite: jp14dkynp +googlenet_quantized_ONNX_DML_NPU-cs_x_elite: jgdxrye6p +fcn_resnet50_quantized_TFLITE-cs_8_gen_2: jgdxryekp +fcn_resnet50_quantized_TFLITE-cs_8_gen_3: j57yj10q5 +fcn_resnet50_quantized_TFLITE-cs_8_elite: jp4lx6kq5 +fcn_resnet50_quantized_TFLITE-cs_6490: jpxk78nj5 +fcn_resnet50_quantized_TFLITE-cs_8250: j5mnw1qyp +fcn_resnet50_quantized_TFLITE-cs_8550: jgn69dmv5 +fcn_resnet50_quantized_TFLITE-cs_auto_lemans_8255: jprv4m2vg +fcn_resnet50_quantized_TFLITE-cs_auto_lemans_8775: jp2k7q9xp +fcn_resnet50_quantized_TFLITE-cs_auto_lemans_8650: jpy14kjrp +fcn_resnet50_quantized_TFLITE-cs_auto_makena_8295: jp0z18225 +fcn_resnet50_quantized_TFLITE-cs_xr_8450: jp8q3dmzp +fcn_resnet50_quantized_QNN-cs_8_gen_2: jgkelwqyg +fcn_resnet50_quantized_QNN-cs_8_gen_3: j5q67xr7p +fcn_resnet50_quantized_QNN-cs_8_elite: jglv092e5 +fcn_resnet50_quantized_QNN-cs_6490: j56y39zvp +fcn_resnet50_quantized_QNN-cs_8550: jp3j4l1xg +fcn_resnet50_quantized_QNN-cs_x_elite: jgo217n4p +fcn_resnet50_quantized_QNN-cs_auto_lemans_8255: jpv61yr75 +fcn_resnet50_quantized_QNN-cs_auto_lemans_8775: jgjv0627g +fcn_resnet50_quantized_QNN-cs_auto_lemans_8650: jpedr0w75 +fcn_resnet50_quantized_QNN-cs_auto_makena_8295: jgz3xqjz5 +fcn_resnet50_quantized_QNN-cs_xr_8450: j5wed03z5 +fcn_resnet50_quantized_ONNX-cs_8_gen_2: jg9l37yqg +fcn_resnet50_quantized_ONNX-cs_8_gen_3: jp14dkwkp +fcn_resnet50_quantized_ONNX-cs_8_elite: jgdxryqkp +fcn_resnet50_quantized_ONNX-cs_x_elite: j57yj1lq5 +fcn_resnet50_quantized_ONNX_DML_NPU-cs_x_elite: jp4lx6dq5 +densenet121_quantized_TFLITE-cs_8_gen_2: jprv4mevg +densenet121_quantized_TFLITE-cs_8_gen_3: jp2k7qlxp +densenet121_quantized_TFLITE-cs_8_elite: jpy14k6rp +densenet121_quantized_TFLITE-cs_6490: jp0z18l25 +densenet121_quantized_TFLITE-cs_8250: jp8q3dzzp +densenet121_quantized_TFLITE-cs_8550: jgkelw3yg +densenet121_quantized_TFLITE-cs_auto_lemans_8255: j5q67x37p +densenet121_quantized_TFLITE-cs_auto_lemans_8775: jglv093e5 +densenet121_quantized_TFLITE-cs_auto_lemans_8650: j56y39nvp +densenet121_quantized_TFLITE-cs_auto_makena_8295: jp3j4lexg +densenet121_quantized_TFLITE-cs_xr_8450: jgo21734p +densenet121_quantized_QNN-cs_8_gen_2: jpv61yv75 +densenet121_quantized_QNN-cs_8_gen_3: jgjv06e7g +densenet121_quantized_QNN-cs_8_elite: jpedr0k75 +densenet121_quantized_QNN-cs_6490: jgz3xqrz5 +densenet121_quantized_QNN-cs_8550: j5wed0qz5 +densenet121_quantized_QNN-cs_x_elite: jg9l37wqg +densenet121_quantized_QNN-cs_auto_lemans_8255: jp14dkekp +densenet121_quantized_QNN-cs_auto_lemans_8775: jgdxryokp +densenet121_quantized_QNN-cs_auto_lemans_8650: j57yj1xq5 +densenet121_quantized_QNN-cs_auto_makena_8295: jp4lx6vq5 +densenet121_quantized_QNN-cs_xr_8450: jpxk78yj5 +densenet121_quantized_ONNX-cs_8_gen_2: j5mnw13yp +densenet121_quantized_ONNX-cs_8_gen_3: jgn69dev5 +densenet121_quantized_ONNX-cs_8_elite: jprv4myvg +densenet121_quantized_ONNX-cs_x_elite: jp2k7qmxp +densenet121_quantized_ONNX_DML_NPU-cs_x_elite: jpy14kdrp +deeplabv3_plus_mobilenet_quantized_TFLITE-cs_8_gen_2: jgkelwyyg +deeplabv3_plus_mobilenet_quantized_TFLITE-cs_8_gen_3: j5q67x27p +deeplabv3_plus_mobilenet_quantized_TFLITE-cs_8_elite: jglv09ke5 +deeplabv3_plus_mobilenet_quantized_TFLITE-cs_6490: j56y391vp +deeplabv3_plus_mobilenet_quantized_TFLITE-cs_8250: jp3j4lmxg +deeplabv3_plus_mobilenet_quantized_TFLITE-cs_8550: jgo217v4p +deeplabv3_plus_mobilenet_quantized_TFLITE-cs_auto_lemans_8255: jpv61yw75 +deeplabv3_plus_mobilenet_quantized_TFLITE-cs_auto_lemans_8775: jgjv06l7g +deeplabv3_plus_mobilenet_quantized_TFLITE-cs_auto_lemans_8650: jpedr0v75 +deeplabv3_plus_mobilenet_quantized_TFLITE-cs_auto_makena_8295: jgz3xq7z5 +deeplabv3_plus_mobilenet_quantized_TFLITE-cs_xr_8450: j5wed09z5 +deeplabv3_plus_mobilenet_quantized_QNN-cs_8_gen_2: jg9l374qg +deeplabv3_plus_mobilenet_quantized_QNN-cs_8_gen_3: jp14dk8kp +deeplabv3_plus_mobilenet_quantized_QNN-cs_8_elite: jgdxryvkp +deeplabv3_plus_mobilenet_quantized_QNN-cs_6490: j57yj1dq5 +deeplabv3_plus_mobilenet_quantized_QNN-cs_8550: jp4lx6wq5 +deeplabv3_plus_mobilenet_quantized_QNN-cs_x_elite: jpxk781j5 +deeplabv3_plus_mobilenet_quantized_QNN-cs_auto_lemans_8255: j5mnw1zyp +deeplabv3_plus_mobilenet_quantized_QNN-cs_auto_lemans_8775: jgn69d9v5 +deeplabv3_plus_mobilenet_quantized_QNN-cs_auto_lemans_8650: jprv4m4vg +deeplabv3_plus_mobilenet_quantized_QNN-cs_auto_makena_8295: jp2k7q7xp +deeplabv3_plus_mobilenet_quantized_QNN-cs_xr_8450: jpy14k4rp +deeplabv3_plus_mobilenet_quantized_ONNX-cs_8_gen_2: jp0z18125 +deeplabv3_plus_mobilenet_quantized_ONNX-cs_8_gen_3: jp8q3d3zp +deeplabv3_plus_mobilenet_quantized_ONNX-cs_8_elite: jgkelwlyg +deeplabv3_plus_mobilenet_quantized_ONNX-cs_x_elite: j5q67x77p +deeplabv3_plus_mobilenet_quantized_ONNX_DML_NPU-cs_x_elite: jglv090e5 +convnext_tiny_w8a8_quantized_TFLITE-cs_8_gen_2: jpv61y175 +convnext_tiny_w8a8_quantized_TFLITE-cs_8_gen_3: jgjv0607g +convnext_tiny_w8a8_quantized_TFLITE-cs_8_elite: jpedr0r75 +convnext_tiny_w8a8_quantized_TFLITE-cs_6490: jgz3xqxz5 +convnext_tiny_w8a8_quantized_TFLITE-cs_8250: j5wed0dz5 +convnext_tiny_w8a8_quantized_TFLITE-cs_8550: jg9l373qg +convnext_tiny_w8a8_quantized_TFLITE-cs_auto_lemans_8255: jp14dkdkp +convnext_tiny_w8a8_quantized_TFLITE-cs_auto_lemans_8775: jgdxryrkp +convnext_tiny_w8a8_quantized_TFLITE-cs_auto_lemans_8650: j5wed0dj5 +convnext_tiny_w8a8_quantized_TFLITE-cs_auto_makena_8295: jg9l373vg +convnext_tiny_w8a8_quantized_TFLITE-cs_xr_8450: jp14dkdlp +convnext_tiny_w8a8_quantized_QNN-cs_8_gen_2: jgdxryrlp +convnext_tiny_w8a8_quantized_QNN-cs_8_gen_3: j57yj1vr5 +convnext_tiny_w8a8_quantized_QNN-cs_8_elite: jp4lx6jl5 +convnext_tiny_w8a8_quantized_QNN-cs_6490: jpxk78e95 +convnext_tiny_w8a8_quantized_QNN-cs_8550: j5mnw1vqp +convnext_tiny_w8a8_quantized_QNN-cs_x_elite: jgn69drm5 +convnext_tiny_w8a8_quantized_QNN-cs_auto_lemans_8255: jprv4m1eg +convnext_tiny_w8a8_quantized_QNN-cs_auto_lemans_8775: jp2k7q3mp +convnext_tiny_w8a8_quantized_QNN-cs_auto_lemans_8650: jpy14kv4p +convnext_tiny_w8a8_quantized_QNN-cs_auto_makena_8295: jp0z18ee5 +convnext_tiny_w8a8_quantized_QNN-cs_xr_8450: jp8q3dw8p +convnext_tiny_w8a8_quantized_ONNX-cs_8_gen_2: jgkelwrog +convnext_tiny_w8a8_quantized_ONNX-cs_8_gen_3: j5q67x9mp +convnext_tiny_w8a8_quantized_ONNX-cs_8_elite: jglv09el5 +convnext_tiny_w8a8_quantized_ONNX-cs_x_elite: j56y39q7p +convnext_tiny_w8a8_quantized_ONNX_DML_NPU-cs_x_elite: jp3j4lqzg diff --git a/qai_hub_models/scorecard/intermediates/quantize-jobs.yaml b/qai_hub_models/scorecard/intermediates/quantize-jobs.yaml new file mode 100644 index 00000000..b898a6ee --- /dev/null +++ b/qai_hub_models/scorecard/intermediates/quantize-jobs.yaml @@ -0,0 +1,23 @@ +xlsr_quantized: jp4lx8m85 +wideresnet50_quantized: jgn69xok5 +vit_quantized: jp2k7j4rp +squeezenet1_1_quantized: jp0z1kd95 +shufflenet_v2_quantized: jgkeldowg +sesr_m5_quantized: jglv07oj5 +resnext50_quantized: jp3j48x3g +resnext101_quantized: jgjv01wvg +resnet50_quantized: jgz3xw4o5 +resnet18_quantized: jg9l38xwg +resnet101_quantized: jgdxr0zrp +regnet_quantized: jgdxr0zzp +quicksrnetsmall_quantized: jpxk7mdl5 +quicksrnetmedium_quantized: jgn69x7q5 +quicksrnetlarge_quantized: jp2k7jvqp +mobilenet_v3_large_quantized: jp0z1kvn5 +mobilenet_v2_quantized: jgkeld9ng +inception_v3_quantized: jglv07lm5 +googlenet_quantized: jpv6147r5 +fcn_resnet50_quantized: jgz3xwnx5 +densenet121_quantized: jg9l38d8g +deeplabv3_plus_mobilenet_quantized: j57yj6995 +convnext_tiny_w8a8_quantized: j5mnw489p diff --git a/qai_hub_models/scorecard/path_compile.py b/qai_hub_models/scorecard/path_compile.py index f77a6f45..b56dd0dc 100644 --- a/qai_hub_models/scorecard/path_compile.py +++ b/qai_hub_models/scorecard/path_compile.py @@ -4,6 +4,7 @@ # --------------------------------------------------------------------- import os from enum import Enum +from typing import Optional from qai_hub_models.models.common import TargetRuntime @@ -32,8 +33,23 @@ def enabled(self) -> bool: ) @staticmethod - def all_enabled() -> list["ScorecardCompilePath"]: - return [x for x in ScorecardCompilePath if x.enabled] + def all_compile_paths( + enabled: Optional[bool] = None, + supports_quantization: Optional[bool] = None, + ) -> list["ScorecardCompilePath"]: + """ + Get all compile paths that match the given attributes. + If an attribute is None, it is ignored when filtering paths. + """ + return [ + path + for path in ScorecardCompilePath + if (enabled is None or path.enabled == enabled) + and ( + supports_quantization is None + or path.supports_quantization == supports_quantization + ) + ] @property def runtime(self) -> TargetRuntime: @@ -45,6 +61,13 @@ def runtime(self) -> TargetRuntime: return TargetRuntime.QNN raise NotImplementedError() + @property + def supports_quantization(self) -> bool: + if self == ScorecardCompilePath.ONNX_FP16: + # Only FP32 models are applicable for this compilation path. + return False + return True + def get_compile_options(self, model_is_quantized: bool = False) -> str: if self == ScorecardCompilePath.ONNX_FP16 and not model_is_quantized: return "--quantize_full_type float16 --quantize_io" diff --git a/qai_hub_models/scorecard/path_profile.py b/qai_hub_models/scorecard/path_profile.py index 2625f689..899c7f0e 100644 --- a/qai_hub_models/scorecard/path_profile.py +++ b/qai_hub_models/scorecard/path_profile.py @@ -4,6 +4,7 @@ # --------------------------------------------------------------------- import os from enum import Enum +from typing import Optional from qai_hub_models.models.common import TargetRuntime from qai_hub_models.scorecard.path_compile import ScorecardCompilePath @@ -14,6 +15,7 @@ class ScorecardProfilePath(Enum): QNN = 1 ONNX = 2 ONNX_DML_GPU = 3 + ONNX_DML_NPU = 4 def __str__(self): return self.name.lower() @@ -33,8 +35,23 @@ def enabled(self) -> bool: ) @staticmethod - def all_enabled() -> list["ScorecardProfilePath"]: - return [x for x in ScorecardProfilePath if x.enabled] + def all_profile_paths( + enabled: Optional[bool] = None, + supports_quantization: Optional[bool] = None, + ) -> list["ScorecardProfilePath"]: + """ + Get all profile paths that match the given attributes. + If an attribute is None, it is ignored when filtering paths. + """ + return [ + path + for path in ScorecardProfilePath + if (enabled is None or path.enabled == enabled) + and ( + supports_quantization is None + or path.compile_path.supports_quantization == supports_quantization + ) + ] @property def include_in_perf_yaml(self) -> bool: @@ -48,7 +65,11 @@ def include_in_perf_yaml(self) -> bool: def runtime(self) -> TargetRuntime: if self == ScorecardProfilePath.TFLITE: return TargetRuntime.TFLITE - if self in [ScorecardProfilePath.ONNX, ScorecardProfilePath.ONNX_DML_GPU]: + if self in [ + ScorecardProfilePath.ONNX, + ScorecardProfilePath.ONNX_DML_GPU, + ScorecardProfilePath.ONNX_DML_NPU, + ]: return TargetRuntime.ONNX if self == ScorecardProfilePath.QNN: return TargetRuntime.QNN @@ -58,7 +79,7 @@ def runtime(self) -> TargetRuntime: def compile_path(self) -> ScorecardCompilePath: if self == ScorecardProfilePath.TFLITE: return ScorecardCompilePath.TFLITE - if self == ScorecardProfilePath.ONNX: + if self in [ScorecardProfilePath.ONNX, ScorecardProfilePath.ONNX_DML_NPU]: return ScorecardCompilePath.ONNX if self == ScorecardProfilePath.ONNX_DML_GPU: return ScorecardCompilePath.ONNX_FP16 @@ -69,5 +90,7 @@ def compile_path(self) -> ScorecardCompilePath: @property def profile_options(self) -> str: if self == ScorecardProfilePath.ONNX_DML_GPU: - return "--compute_unit gpu" + return "--onnx_execution_providers directml" + elif self == ScorecardProfilePath.ONNX_DML_NPU: + return "--onnx_execution_providers directml-npu" return "" diff --git a/qai_hub_models/scorecard/results/chipset_helpers.py b/qai_hub_models/scorecard/results/chipset_helpers.py index 14b71311..0c6303ef 100644 --- a/qai_hub_models/scorecard/results/chipset_helpers.py +++ b/qai_hub_models/scorecard/results/chipset_helpers.py @@ -115,6 +115,8 @@ def get_supported_devices(chips) -> list[str]: device.name for device in hub.get_devices(attributes=f"chipset:{chip}") if "(Family)" not in device.name + and "Snapdragon 8 Gen 3 QRD" + != device.name # this is not available to all users ] supported_devices_for_chip = sorted(set(supported_devices_for_chip)) __CHIP_SUPPORTED_DEVICES_CACHE[chip] = supported_devices_for_chip diff --git a/qai_hub_models/scorecard/results/performance_summary.py b/qai_hub_models/scorecard/results/performance_summary.py index 812b5e82..b7c2438b 100644 --- a/qai_hub_models/scorecard/results/performance_summary.py +++ b/qai_hub_models/scorecard/results/performance_summary.py @@ -137,10 +137,15 @@ def get_perf_card( include_failed_jobs: bool = True, include_internal_devices: bool = True, exclude_paths: Iterable[ScorecardProfilePath] = [], + exclude_form_factors: Iterable[ScorecardDevice.FormFactor] = [], ) -> list[dict[str, Union[str, dict[str, str]]]]: perf_card = [] for summary in self.runs_per_device.values(): - if include_internal_devices or summary.device.public: + if ( + include_internal_devices + or summary.device.public + and summary.device.form_factor not in exclude_form_factors + ): device_summary = summary.get_perf_card( include_failed_jobs, exclude_paths ) @@ -222,7 +227,11 @@ def from_runs(model_runs: list[ProfileScorecardJob]): } ) - def get_chipsets(self, include_internal_devices: bool = False) -> set[str]: + def get_chipsets( + self, + include_internal_devices: bool = False, + exclude_form_factors: Iterable[ScorecardDevice.FormFactor] = [], + ) -> set[str]: chips: set[str] = set() for model_id, model_summary in self.runs_per_model.items(): for device, device_summary in model_summary.runs_per_device.items(): @@ -240,7 +249,11 @@ def get_chipsets(self, include_internal_devices: bool = False) -> set[str]: continue # Don't include private devices - if not include_internal_devices and not device.public: + if ( + not include_internal_devices + and not device.public + and device.form_factor not in exclude_form_factors + ): continue chips.add(device.chipset) @@ -251,10 +264,11 @@ def get_perf_card( include_failed_jobs: bool = True, include_internal_devices: bool = True, exclude_paths: Iterable[ScorecardProfilePath] = [], + exclude_form_factors: Iterable[ScorecardDevice.FormFactor] = [], ) -> dict[str, str | list[Any] | dict[str, Any]]: perf_card: dict[str, str | list[Any] | dict[str, Any]] = {} - chips = self.get_chipsets(include_internal_devices) + chips = self.get_chipsets(include_internal_devices, exclude_form_factors) perf_card["aggregated"] = dict( supported_devices=get_supported_devices(chips), supported_chipsets=supported_chipsets_santized(chips), @@ -266,7 +280,10 @@ def get_perf_card( { "name": model_id, "performance_metrics": summary.get_perf_card( - include_failed_jobs, include_internal_devices, exclude_paths + include_failed_jobs, + include_internal_devices, + exclude_paths, + exclude_form_factors, ), } ) diff --git a/qai_hub_models/scorecard/results/scorecard_job.py b/qai_hub_models/scorecard/results/scorecard_job.py index 19b1d6cd..d46ca090 100644 --- a/qai_hub_models/scorecard/results/scorecard_job.py +++ b/qai_hub_models/scorecard/results/scorecard_job.py @@ -148,14 +148,14 @@ def from_model_id( components.append(None) # type: ignore path: ScorecardCompilePath - for path in ScorecardCompilePath.all_enabled(): + for path in ScorecardCompilePath.all_compile_paths(enabled=True): for component in components: model_requires_fp16 = not ( model_code_gen.is_aimet or model_code_gen.use_hub_quantization ) for device in ScorecardDevice.all_devices( enabled=True, - supports_fp16=model_requires_fp16 or None, + supports_fp16_npu=model_requires_fp16 or None, supports_compile_path=path, ): model_runs.append( @@ -224,14 +224,14 @@ def from_model_id( components.append(None) # type: ignore path: ScorecardProfilePath - for path in ScorecardProfilePath.all_enabled(): + for path in ScorecardProfilePath.all_profile_paths(enabled=True): for component in components: model_requires_fp16 = not ( model_code_gen.is_aimet or model_code_gen.use_hub_quantization ) for device in ScorecardDevice.all_devices( enabled=True, - supports_fp16=model_requires_fp16 or None, + supports_fp16_npu=model_requires_fp16 or None, supports_profile_path=path, ): model_runs.append( diff --git a/qai_hub_models/utils/args.py b/qai_hub_models/utils/args.py index 65f05ebf..142ad563 100644 --- a/qai_hub_models/utils/args.py +++ b/qai_hub_models/utils/args.py @@ -29,7 +29,7 @@ from qai_hub_models.utils.inference import OnDeviceModel, compile_model_from_args from qai_hub_models.utils.qai_hub_helpers import can_access_qualcomm_ai_hub -DEFAULT_EXPORT_DEVICE = "Samsung Galaxy S23 (Family)" +DEFAULT_EXPORT_DEVICE = "Samsung Galaxy S24 (Family)" class ParseEnumAction(argparse.Action): @@ -38,6 +38,7 @@ def __init__(self, option_strings, dest, enum_type, **kwargs): self.enum_type = enum_type def __call__(self, parser, namespace, values, option_string=None): + assert isinstance(values, str) setattr(namespace, self.dest, self.enum_type[values.upper().replace("-", "_")]) @@ -441,7 +442,6 @@ def _evaluate_export_common_parser( supports_tflite: bool = True, supports_qnn: bool = True, supports_onnx: bool = True, - supports_precompiled_qnn_onnx: bool = True, default_runtime: TargetRuntime = TargetRuntime.TFLITE, exporting_compiled_model: bool = False, is_hub_quantized: bool = False, @@ -468,7 +468,7 @@ def _evaluate_export_common_parser( available_runtimes.append(TargetRuntime.QNN) if supports_onnx: available_runtimes.append(TargetRuntime.ONNX) - if supports_precompiled_qnn_onnx: + if supports_qnn: available_runtimes.append(TargetRuntime.PRECOMPILED_QNN_ONNX) default_runtime = _get_default_runtime(available_runtimes) @@ -510,7 +510,6 @@ def export_parser( supports_tflite: bool = True, supports_qnn: bool = True, supports_onnx: bool = True, - supports_precompiled_qnn_onnx: bool = True, default_runtime: TargetRuntime = TargetRuntime.TFLITE, exporting_compiled_model: bool = False, default_export_device: str = DEFAULT_EXPORT_DEVICE, @@ -531,9 +530,6 @@ def export_parser( supports_onnx: Whether ORT export is supported. Default=True. - supports_precompiled_qnn_onnx: - Whether precompiled ORT (with QNN context binary) export is supported. - Default=True. default_runtime: Which runtime to use as default if not specified in cli args. exporting_compiled_model: True when exporting compiled model. @@ -550,7 +546,6 @@ def export_parser( supports_tflite=supports_tflite, supports_qnn=supports_qnn, supports_onnx=supports_onnx, - supports_precompiled_qnn_onnx=supports_precompiled_qnn_onnx, default_runtime=default_runtime, exporting_compiled_model=exporting_compiled_model, is_hub_quantized=is_hub_quantized, diff --git a/qai_hub_models/utils/asset_loaders.py b/qai_hub_models/utils/asset_loaders.py index ad6f4825..c0298a09 100644 --- a/qai_hub_models/utils/asset_loaders.py +++ b/qai_hub_models/utils/asset_loaders.py @@ -386,6 +386,7 @@ def __init__( repo_url: str, models_website_url: str, models_website_relative_path: str, + genie_url: str, ) -> None: self.local_store_path = local_store_path self.asset_url = asset_url @@ -401,6 +402,7 @@ def __init__( self.repo_url = repo_url self.models_website_url = models_website_url self.models_website_relative_path = models_website_relative_path + self.genie_url = genie_url def get_hugging_face_url(self, model_name: str) -> str: return f"https://huggingface.co/{self.get_huggingface_path(model_name)}" @@ -525,6 +527,7 @@ def from_cfg( asset_cfg["repo_url"], asset_cfg["models_website_url"], asset_cfg["models_website_relative_path"], + asset_cfg["genie_url"], ) ASSET_CFG_SCHEMA = Schema( diff --git a/qai_hub_models/utils/config_loaders.py b/qai_hub_models/utils/config_loaders.py index 56319e6b..59e2cfde 100644 --- a/qai_hub_models/utils/config_loaders.py +++ b/qai_hub_models/utils/config_loaders.py @@ -13,13 +13,12 @@ from typing import Any, Optional, TypeVar, Union, get_args, get_type_hints import requests -from datasets import get_dataset_infos from qai_hub.util.session import create_session from schema import And from schema import Optional as OptionalSchema from schema import Schema, SchemaError -from qai_hub_models.scorecard import ScorecardProfilePath +from qai_hub_models.scorecard import ScorecardDevice, ScorecardProfilePath from qai_hub_models.utils.asset_loaders import ASSET_CONFIG, QAIHM_WEB_ASSET, load_yaml from qai_hub_models.utils.path_helpers import ( MODELS_PACKAGE_NAME, @@ -183,23 +182,6 @@ def from_dict( BaseDataClassTypeVar = TypeVar("BaseDataClassTypeVar", bound="BaseDataClass") -@unique -class FORM_FACTOR(Enum): - PHONE = 0 - TABLET = 1 - IOT = 2 - XR = 3 - - @staticmethod - def from_string(string: str) -> FORM_FACTOR: - return FORM_FACTOR[string.upper()] - - def __str__(self): - if self == FORM_FACTOR.IOT: - return "IoT" - return self.name.title() - - @unique class MODEL_DOMAIN(Enum): COMPUTER_VISION = 0 @@ -512,6 +494,9 @@ class QAIHMModelCodeGen(BaseDataClass): # ideally with a reference to an internal issue. onnx_export_failure_reason: str = "" + # If the default device needs to be overwritten for a model. + default_device: str = "" + # Sets the `check_trace` argument on `torch.jit.trace`. check_trace: bool = True @@ -634,6 +619,9 @@ class QAIHMModelInfo(BaseDataClass): # This should be set to public unless the model has poor accuracy/perf. status: MODEL_STATUS + # Device form factors for which we don't publish performance data. + private_perf_form_factors: list[ScorecardDevice.FormFactor] + # A brief catchy headline explaining what the model does and why it may be interesting headline: str @@ -660,7 +648,7 @@ class QAIHMModelInfo(BaseDataClass): # A list of device types for which this model could be useful. # If unsure what to put here, default to `Phone` and `Tablet`. - form_factors: list[FORM_FACTOR] + form_factors: list[ScorecardDevice.FormFactor] # Whether the model has a static image uploaded in S3. All public models must have this. has_static_banner: bool @@ -813,6 +801,9 @@ def validate(self) -> tuple[bool, Optional[str]]: ): return False, "Public models must support at least one export path" + if not self.has_static_banner: + return False, "Public models must have a static asset." + session = create_session() if self.has_static_banner: static_banner_url = ASSET_CONFIG.get_web_asset_url( @@ -927,16 +918,6 @@ def get_hugging_face_metadata(self, root: Path = QAIHM_PACKAGE_ROOT): hf_metadata["library_name"] = "pytorch" hf_metadata["license"] = self.license_type hf_metadata["tags"] = [tag.name.lower() for tag in self.tags] + ["android"] - if self.dataset != []: - for dataset_id in self.dataset: - try: - get_dataset_infos(dataset_id) - except Exception: - raise ValueError( - f"This dataset {dataset_id} is not a valid HuggingFace Dataset." - ) - hf_metadata["datasets"] = self.dataset - hf_metadata["pipeline_tag"] = self.get_hf_pipeline_tag() return hf_metadata @@ -986,11 +967,16 @@ def from_yaml( # Load CFG and params data = load_yaml(info_path) data["status"] = MODEL_STATUS.from_string(data["status"]) + data["private_perf_form_factors"] = [ + ScorecardDevice.FormFactor.from_string(tag) + for tag in data.get("private_perf_form_factors", []) + ] + data["domain"] = MODEL_DOMAIN.from_string(data["domain"]) data["use_case"] = MODEL_USE_CASE.from_string(data["use_case"]) data["tags"] = [MODEL_TAG.from_string(tag) for tag in data["tags"]] data["form_factors"] = [ - FORM_FACTOR.from_string(tag) for tag in data["form_factors"] + ScorecardDevice.FormFactor.from_string(tag) for tag in data["form_factors"] ] data["code_gen_config"] = QAIHMModelCodeGen.from_yaml(code_gen_path) diff --git a/qai_hub_models/utils/evaluate.py b/qai_hub_models/utils/evaluate.py index d4277a2b..b0ddb06f 100644 --- a/qai_hub_models/utils/evaluate.py +++ b/qai_hub_models/utils/evaluate.py @@ -19,8 +19,9 @@ from qai_hub.public_rest_api import DatasetEntries from qai_hub.util.dataset_entries_converters import dataset_entries_to_h5 from torch.utils.data import DataLoader, Dataset, random_split +from tqdm import tqdm -from qai_hub_models.datasets import BaseDataset, get_dataset_from_name +from qai_hub_models.datasets import BaseDataset, DatasetSplit, get_dataset_from_name from qai_hub_models.models.protocols import EvalModelProtocol from qai_hub_models.utils.asset_loaders import ( get_hub_datasets_path, @@ -116,18 +117,20 @@ def _populate_data_cache_impl( dataloader = DataLoader(dataset, batch_size=split_size, shuffle=True) for sample in dataloader: model_inputs, ground_truth_values, *_ = sample - if isinstance(ground_truth_values, tuple): + if isinstance(ground_truth_values, list) or isinstance( + ground_truth_values, tuple + ): output_names = [f"output_{i}" for i in range(len(ground_truth_values))] + ground_truth_values = tuple(ground_truth_values) else: output_names = ["output_0"] + ground_truth_values = (ground_truth_values,) input_entries = make_hub_dataset_entries( (model_inputs.split(1, dim=0),), input_names, channel_last_input, ) - gt_entries = make_hub_dataset_entries( - (ground_truth_values,), output_names, None - ) + gt_entries = make_hub_dataset_entries(ground_truth_values, output_names, None) # print(input_entries) input_dataset = hub.upload_dataset(input_entries) gt_dataset = hub.upload_dataset(gt_entries) @@ -337,7 +340,7 @@ def evaluate_on_dataset( assert isinstance(torch_model, EvalModelProtocol), "Model must have an evaluator." _validate_inputs(num_samples) - source_torch_dataset = get_dataset_from_name(dataset_name) + source_torch_dataset = get_dataset_from_name(dataset_name, DatasetSplit.VAL) input_names = list(torch_model.get_input_spec().keys()) on_device_model = AsyncOnDeviceModel( compiled_model, input_names, hub_device, profile_options @@ -373,9 +376,13 @@ def evaluate_on_dataset( else: on_device_results.append(on_device_model(model_inputs.split(1, dim=0))) - for model_input, ground_truth in zip(model_inputs, ground_truth_values): + for j, model_input in tqdm(enumerate(model_inputs)): + if isinstance(ground_truth_values, torch.Tensor): + ground_truth = ground_truth_values[j : j + 1] + else: + ground_truth = tuple(val[j : j + 1] for val in ground_truth_values) torch_output = torch_model(model_input.unsqueeze(0)) - torch_evaluator.add_batch(torch_output, ground_truth.unsqueeze(0)) + torch_evaluator.add_batch(torch_output, ground_truth) print( f"Cumulative torch accuracy on batch {i + 1}/{num_batches}: " f"{torch_evaluator.formatted_accuracy()}" diff --git a/qai_hub_models/utils/quantization.py b/qai_hub_models/utils/quantization.py index ce339f9a..f0cc2443 100644 --- a/qai_hub_models/utils/quantization.py +++ b/qai_hub_models/utils/quantization.py @@ -8,9 +8,9 @@ import torch from qai_hub.client import DatasetEntries, Device, QuantizeDtype -from torch.utils.data import DataLoader +from torch.utils.data import DataLoader, TensorDataset -from qai_hub_models.datasets import get_dataset_from_name +from qai_hub_models.datasets import DatasetSplit, get_dataset_from_name from qai_hub_models.models.common import TargetRuntime from qai_hub_models.models.protocols import HubModelProtocol from qai_hub_models.utils.asset_loaders import CachedWebDatasetAsset, load_torch @@ -31,7 +31,7 @@ def make_image_sample_data_loader() -> DataLoader: img_tensor = get_image_quantization_samples() - tensor_dataset = torch.utils.data.TensorDataset(img_tensor) + tensor_dataset = TensorDataset(img_tensor) return DataLoader(tensor_dataset, batch_size=32) @@ -90,7 +90,9 @@ def get_calibration_data( Returns: Dataset compatible with the format expected by AI Hub. """ - torch_dataset = sample_dataset(get_dataset_from_name(dataset_name), num_samples) + torch_dataset = sample_dataset( + get_dataset_from_name(dataset_name, split=DatasetSplit.TRAIN), num_samples + ) torch_samples = tuple( [torch_dataset[i][j].unsqueeze(0).numpy() for i in range(len(torch_dataset))] for j in range(len(input_spec)) diff --git a/scripts/examples/quantize_segmenter_voc.py b/scripts/examples/quantize_segmenter_voc.py index 2efa487a..93dc20c8 100644 --- a/scripts/examples/quantize_segmenter_voc.py +++ b/scripts/examples/quantize_segmenter_voc.py @@ -63,8 +63,10 @@ model = MODELS[args.model].from_pretrained(aimet_encodings=None) - image_size = model.get_input_spec()["image"][0][-2:] - dataset = VOCSegmentationDataset(image_size=image_size) + image_size = model.get_input_spec()["image"][0] + dataset = VOCSegmentationDataset( + input_height=image_size[-2], input_width=image_size[-1] + ) dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True) evaluator = model.get_evaluator() diff --git a/scripts/tasks/task.py b/scripts/tasks/task.py index 6c17af08..48149aee 100644 --- a/scripts/tasks/task.py +++ b/scripts/tasks/task.py @@ -202,6 +202,7 @@ def __init__( # Pytest returns code 5 if no tests were run. Set this to true # to ignore that return code (count it as "passed") ignore_no_tests_return_code: bool = False, + include_pytest_cmd_in_status_message: bool = True, ) -> None: pytest_options = "" @@ -228,6 +229,7 @@ def __init__( default_options = "-rxXs -p no:warnings --durations-min=0.5 --durations=20" command = f"pytest {default_options} {pytest_options} " + self.include_pytest_cmd_in_status_message = include_pytest_cmd_in_status_message super().__init__( group_name, venv, @@ -237,6 +239,11 @@ def __init__( ignore_return_codes=[5] if ignore_no_tests_return_code else [], ) + def get_status_message(self) -> str: + if not self.include_pytest_cmd_in_status_message and self.last_result is False: + return f"{self.group_name} failed." + return super().get_status_message() + class CompositeTask(Task): """ @@ -276,6 +283,12 @@ def get_status_message(self) -> str: if self.last_result is not None: if self.last_result: return f"{self.group_name} succeeded." + elif self.group_name is None: + all_res = [] + for task in self.tasks: + if not task.last_result: + all_res.append(task.get_status_message()) + return "\n".join(all_res) else: res = f"{self.group_name} failed." if self.show_subtasks_in_failure_message: diff --git a/scripts/tasks/test.py b/scripts/tasks/test.py index d444c837..ad7110d2 100644 --- a/scripts/tasks/test.py +++ b/scripts/tasks/test.py @@ -172,6 +172,7 @@ def __init__( env=env, raise_on_failure=not needs_model_venv, # Do not raise on failure if a model venv was created, to make sure the venv is removed when the test finishes ignore_no_tests_return_code=True, + include_pytest_cmd_in_status_message=False, ) )