From 9857d3ffe0df7946040aec35628e1f6582b03095 Mon Sep 17 00:00:00 2001 From: Qualcomm AI Stack Models Bot Date: Thu, 12 Dec 2024 12:55:21 -0800 Subject: [PATCH] v0.20.0 See https://github.com/quic/ai-hub-models/releases/v0.20.0 for changelog. Signed-off-by: QAIHM Team --- README.md | 11 +- qai_hub_models/_version.py | 2 +- qai_hub_models/labels/ppe_labels.txt | 2 + .../depth_estimation}/app.py | 15 +- .../models/_shared/depth_estimation/demo.py | 49 + .../{ => _shared}/face_attrib_net/app.py | 0 .../models/_shared/face_attrib_net/demo.py | 57 + .../models/_shared/face_attrib_net/model.py | 644 ++++ .../models/_shared/face_detection/app.py | 32 + .../models/_shared/face_detection/demo.py | 67 + .../models/_shared/face_detection/model.py | 259 ++ qai_hub_models/models/_shared/yolo/app.py | 198 +- qai_hub_models/models/_shared/yolo/demo.py | 57 +- qai_hub_models/models/_shared/yolo/model.py | 42 + qai_hub_models/models/aotgan/export.py | 2 +- qai_hub_models/models/aotgan/perf.yaml | 298 +- qai_hub_models/models/beit/perf.yaml | 272 +- .../conditional_detr_resnet50/perf.yaml | 266 +- .../models/controlnet_quantized/export.py | 6 +- qai_hub_models/models/convnext_base/perf.yaml | 148 +- qai_hub_models/models/convnext_tiny/perf.yaml | 258 +- .../convnext_tiny_w8a16_quantized/perf.yaml | 140 +- .../convnext_tiny_w8a8_quantized/evaluate.py | 1 - .../convnext_tiny_w8a8_quantized/export.py | 5 +- .../convnext_tiny_w8a8_quantized/perf.yaml | 201 +- qai_hub_models/models/ddrnet23_slim/export.py | 2 +- qai_hub_models/models/ddrnet23_slim/perf.yaml | 323 +- .../models/deeplabv3_plus_mobilenet/perf.yaml | 270 +- .../evaluate.py | 1 + .../export.py | 2 +- .../perf.yaml | 250 +- .../models/deeplabv3_resnet50/perf.yaml | 131 +- qai_hub_models/models/densenet121/perf.yaml | 249 +- .../models/densenet121_quantized/info.yaml | 1 + .../models/densenet121_quantized/perf.yaml | 188 +- .../README.md | 22 +- .../models/depth_anything/__init__.py | 10 + .../models/depth_anything/conftest.py | 39 + qai_hub_models/models/depth_anything/demo.py | 24 + .../models/depth_anything/export.py | 215 ++ .../models/depth_anything/info.yaml | 35 + qai_hub_models/models/depth_anything/model.py | 66 + .../models/depth_anything/perf.yaml | 512 +++ .../models/depth_anything/requirements.txt | 1 + qai_hub_models/models/depth_anything/test.py | 55 + .../models/depth_anything_v2/README.md | 63 + .../models/depth_anything_v2/__init__.py | 10 + .../models/depth_anything_v2/conftest.py | 39 + .../models/depth_anything_v2/demo.py | 24 + .../models/depth_anything_v2/export.py | 215 ++ .../models/depth_anything_v2/info.yaml | 35 + .../models/depth_anything_v2/model.py | 66 + .../models/depth_anything_v2/perf.yaml | 512 +++ .../models/depth_anything_v2/requirements.txt | 1 + .../models/depth_anything_v2/test.py | 55 + .../models/detr_resnet101/export.py | 2 +- .../models/detr_resnet101/perf.yaml | 331 +- .../models/detr_resnet101_dc5/export.py | 2 +- .../models/detr_resnet101_dc5/perf.yaml | 331 +- qai_hub_models/models/detr_resnet50/export.py | 2 +- qai_hub_models/models/detr_resnet50/perf.yaml | 336 +- .../models/detr_resnet50_dc5/export.py | 2 +- .../models/detr_resnet50_dc5/perf.yaml | 333 +- .../models/efficientnet_b0/perf.yaml | 248 +- .../models/efficientnet_b4/perf.yaml | 146 +- .../models/efficientnet_v2_s/perf.yaml | 146 +- .../models/efficientvit_b2_cls/perf.yaml | 152 +- .../models/efficientvit_l2_cls/perf.yaml | 160 +- .../models/efficientvit_l2_seg/perf.yaml | 108 +- qai_hub_models/models/esrgan/perf.yaml | 272 +- .../models/face_attrib_net/README.md | 6 +- .../models/face_attrib_net/__init__.py | 1 - qai_hub_models/models/face_attrib_net/demo.py | 57 +- .../models/face_attrib_net/export.py | 2 +- .../models/face_attrib_net/info.yaml | 3 +- .../models/face_attrib_net/model.py | 639 +--- .../models/face_attrib_net/perf.yaml | 271 +- qai_hub_models/models/face_attrib_net/test.py | 10 +- .../face_attrib_net_quantized/README.md | 58 + .../face_attrib_net_quantized/__init__.py | 11 + .../face_attrib_net_quantized/conftest.py | 37 + .../demo.py | 10 +- .../face_attrib_net_quantized/export.py | 216 ++ .../face_attrib_net_quantized/info.yaml | 32 + .../models/face_attrib_net_quantized/model.py | 92 + .../face_attrib_net_quantized/perf.yaml | 579 ++++ .../models/face_attrib_net_quantized/test.py | 58 + qai_hub_models/models/face_det_lite/app.py | 4 +- qai_hub_models/models/face_det_lite/demo.py | 57 +- qai_hub_models/models/face_det_lite/model.py | 252 +- qai_hub_models/models/face_det_lite/perf.yaml | 246 +- qai_hub_models/models/face_det_lite/test.py | 4 +- .../models/face_det_lite_quantized/README.md | 58 + .../face_det_lite_quantized/__init__.py | 8 + .../face_det_lite_quantized/conftest.py | 39 + .../models/face_det_lite_quantized/demo.py | 17 + .../models/face_det_lite_quantized/export.py | 216 ++ .../models/face_det_lite_quantized/info.yaml | 32 + .../models/face_det_lite_quantized/model.py | 78 + .../models/face_det_lite_quantized/perf.yaml | 579 ++++ .../models/face_det_lite_quantized/test.py | 60 + qai_hub_models/models/facemap_3dmm/perf.yaml | 254 +- .../models/facemap_3dmm_quantized/export.py | 4 +- .../models/facemap_3dmm_quantized/model.py | 4 +- .../models/facemap_3dmm_quantized/perf.yaml | 419 ++- qai_hub_models/models/fastsam_s/export.py | 4 +- qai_hub_models/models/fastsam_s/perf.yaml | 352 ++- qai_hub_models/models/fastsam_x/export.py | 4 +- qai_hub_models/models/fastsam_x/perf.yaml | 341 +- qai_hub_models/models/fcn_resnet50/perf.yaml | 268 +- .../models/fcn_resnet50_quantized/evaluate.py | 1 + .../models/fcn_resnet50_quantized/export.py | 2 +- .../models/fcn_resnet50_quantized/perf.yaml | 250 +- .../models/ffnet_122ns_lowres/perf.yaml | 266 +- qai_hub_models/models/ffnet_40s/perf.yaml | 266 +- .../models/ffnet_40s_quantized/info.yaml | 1 + .../models/ffnet_40s_quantized/perf.yaml | 190 +- qai_hub_models/models/ffnet_54s/perf.yaml | 267 +- .../models/ffnet_54s_quantized/info.yaml | 1 + .../models/ffnet_54s_quantized/perf.yaml | 190 +- qai_hub_models/models/ffnet_78s/perf.yaml | 272 +- .../models/ffnet_78s_lowres/perf.yaml | 268 +- .../models/ffnet_78s_quantized/info.yaml | 1 + .../models/ffnet_78s_quantized/perf.yaml | 182 +- .../models/foot_track_net/perf.yaml | 256 +- .../models/foot_track_net_quantized/perf.yaml | 272 +- .../models/gear_guard_net/info.yaml | 1 + .../models/gear_guard_net/perf.yaml | 242 +- .../models/gear_guard_net_quantized/info.yaml | 1 + .../models/gear_guard_net_quantized/perf.yaml | 286 +- qai_hub_models/models/googlenet/perf.yaml | 234 +- .../models/googlenet_quantized/evaluate.py | 1 + .../models/googlenet_quantized/export.py | 2 +- .../models/googlenet_quantized/info.yaml | 1 + .../models/googlenet_quantized/perf.yaml | 268 +- qai_hub_models/models/hrnet_pose/perf.yaml | 263 +- .../models/hrnet_pose_quantized/info.yaml | 1 + .../models/hrnet_pose_quantized/perf.yaml | 251 +- .../huggingface_wavlm_base_plus/export.py | 2 +- .../huggingface_wavlm_base_plus/model.py | 15 +- .../huggingface_wavlm_base_plus/perf.yaml | 272 +- qai_hub_models/models/inception_v3/perf.yaml | 261 +- .../models/inception_v3_quantized/evaluate.py | 1 + .../models/inception_v3_quantized/export.py | 2 +- .../models/inception_v3_quantized/info.yaml | 1 + .../models/inception_v3_quantized/perf.yaml | 248 +- qai_hub_models/models/lama_dilated/export.py | 2 +- qai_hub_models/models/lama_dilated/perf.yaml | 289 +- qai_hub_models/models/litehrnet/perf.yaml | 164 +- .../llama_v2_7b_chat_quantized/model.py | 6 +- .../models/mediapipe_face/perf.yaml | 498 +-- .../models/mediapipe_face_quantized/info.yaml | 1 + .../models/mediapipe_face_quantized/perf.yaml | 484 +-- .../models/mediapipe_hand/export.py | 4 +- .../models/mediapipe_hand/perf.yaml | 644 +++- .../models/mediapipe_pose/export.py | 4 +- .../models/mediapipe_pose/perf.yaml | 640 +++- .../models/mediapipe_selfie/perf.yaml | 246 +- qai_hub_models/models/midas/__init__.py | 5 +- qai_hub_models/models/midas/demo.py | 44 +- qai_hub_models/models/midas/perf.yaml | 250 +- qai_hub_models/models/midas/test.py | 6 +- .../models/midas_quantized/__init__.py | 4 +- qai_hub_models/models/midas_quantized/demo.py | 7 +- .../models/midas_quantized/export.py | 2 +- .../models/midas_quantized/info.yaml | 1 + .../models/midas_quantized/perf.yaml | 309 +- qai_hub_models/models/midas_quantized/test.py | 4 +- qai_hub_models/models/mnasnet05/perf.yaml | 259 +- qai_hub_models/models/mobile_vit/perf.yaml | 513 ++- qai_hub_models/models/mobilenet_v2/perf.yaml | 250 +- .../models/mobilenet_v2_quantized/info.yaml | 1 + .../models/mobilenet_v2_quantized/perf.yaml | 298 +- .../models/mobilenet_v3_large/perf.yaml | 244 +- .../mobilenet_v3_large_quantized/evaluate.py | 1 + .../mobilenet_v3_large_quantized/export.py | 2 +- .../mobilenet_v3_large_quantized/info.yaml | 1 + .../mobilenet_v3_large_quantized/perf.yaml | 258 +- .../models/mobilenet_v3_small/perf.yaml | 242 +- qai_hub_models/models/openai_clip/export.py | 10 +- qai_hub_models/models/openai_clip/perf.yaml | 786 +++-- qai_hub_models/models/openpose/perf.yaml | 252 +- .../models/posenet_mobilenet/perf.yaml | 242 +- .../posenet_mobilenet_quantized/perf.yaml | 226 +- .../models/quicksrnetlarge/perf.yaml | 260 +- .../quicksrnetlarge_quantized/info.yaml | 1 + .../quicksrnetlarge_quantized/perf.yaml | 301 +- .../models/quicksrnetmedium/perf.yaml | 264 +- .../quicksrnetmedium_quantized/info.yaml | 1 + .../quicksrnetmedium_quantized/perf.yaml | 302 +- .../models/quicksrnetsmall/perf.yaml | 265 +- .../quicksrnetsmall_quantized/info.yaml | 1 + .../quicksrnetsmall_quantized/perf.yaml | 272 +- .../models/real_esrgan_general_x4v3/perf.yaml | 264 +- .../models/real_esrgan_x4plus/perf.yaml | 270 +- qai_hub_models/models/regnet/perf.yaml | 252 +- .../models/regnet_quantized/evaluate.py | 1 + .../models/regnet_quantized/export.py | 2 +- .../models/regnet_quantized/info.yaml | 1 + .../models/regnet_quantized/perf.yaml | 240 +- qai_hub_models/models/resnet101/perf.yaml | 258 +- .../models/resnet101_quantized/evaluate.py | 1 + .../models/resnet101_quantized/export.py | 2 +- .../models/resnet101_quantized/info.yaml | 1 + .../models/resnet101_quantized/perf.yaml | 248 +- qai_hub_models/models/resnet18/perf.yaml | 241 +- .../models/resnet18_quantized/evaluate.py | 1 + .../models/resnet18_quantized/export.py | 2 +- .../models/resnet18_quantized/info.yaml | 1 + .../models/resnet18_quantized/perf.yaml | 260 +- qai_hub_models/models/resnet50/perf.yaml | 263 +- .../models/resnet50_quantized/evaluate.py | 1 + .../models/resnet50_quantized/export.py | 2 +- .../models/resnet50_quantized/info.yaml | 1 + .../models/resnet50_quantized/perf.yaml | 240 +- qai_hub_models/models/resnext101/perf.yaml | 267 +- .../models/resnext101_quantized/evaluate.py | 1 + .../models/resnext101_quantized/export.py | 2 +- .../models/resnext101_quantized/perf.yaml | 246 +- qai_hub_models/models/resnext50/perf.yaml | 252 +- .../models/resnext50_quantized/evaluate.py | 1 + .../models/resnext50_quantized/export.py | 2 +- .../models/resnext50_quantized/info.yaml | 1 + .../models/resnext50_quantized/perf.yaml | 242 +- qai_hub_models/models/sam/app.py | 289 +- qai_hub_models/models/sam/conftest.py | 2 - qai_hub_models/models/sam/demo.py | 24 +- qai_hub_models/models/sam/export.py | 32 +- qai_hub_models/models/sam/model.py | 430 ++- qai_hub_models/models/sam/model_patches.py | 536 ++++ qai_hub_models/models/sam/perf.yaml | 2765 +++++++++++++++-- qai_hub_models/models/sam/test.py | 146 +- qai_hub_models/models/sesr_m5/perf.yaml | 250 +- .../models/sesr_m5_quantized/info.yaml | 4 +- .../models/sesr_m5_quantized/perf.yaml | 302 +- qai_hub_models/models/shufflenet_v2/perf.yaml | 252 +- .../models/shufflenet_v2_quantized/info.yaml | 1 + .../models/shufflenet_v2_quantized/perf.yaml | 294 +- qai_hub_models/models/sinet/perf.yaml | 246 +- qai_hub_models/models/squeezenet1_1/perf.yaml | 242 +- .../models/squeezenet1_1_quantized/info.yaml | 1 + .../models/squeezenet1_1_quantized/perf.yaml | 296 +- qai_hub_models/models/swin_base/perf.yaml | 268 +- qai_hub_models/models/swin_small/perf.yaml | 296 +- qai_hub_models/models/swin_tiny/perf.yaml | 275 +- qai_hub_models/models/trocr/export.py | 6 +- qai_hub_models/models/trocr/perf.yaml | 755 +++-- .../models/unet_segmentation/perf.yaml | 283 +- qai_hub_models/models/vit/evaluate.py | 1 - qai_hub_models/models/vit/export.py | 2 +- qai_hub_models/models/vit/perf.yaml | 331 +- qai_hub_models/models/vit_quantized/info.yaml | 1 + qai_hub_models/models/vit_quantized/perf.yaml | 178 +- .../models/whisper_base_en/export.py | 6 +- .../models/whisper_base_en/perf.yaml | 876 +++--- .../models/whisper_small_en/export.py | 247 -- .../models/whisper_small_en/info.yaml | 40 - .../models/whisper_small_en/model.py | 16 - .../models/whisper_small_en/perf.yaml | 845 ----- .../models/whisper_small_en/requirements.txt | 4 - .../models/whisper_small_en/test.py | 22 - .../models/whisper_tiny_en/export.py | 6 +- .../models/whisper_tiny_en/perf.yaml | 758 ++--- qai_hub_models/models/wideresnet50/perf.yaml | 250 +- .../models/wideresnet50_quantized/evaluate.py | 1 + .../models/wideresnet50_quantized/export.py | 2 +- .../models/wideresnet50_quantized/info.yaml | 1 + .../models/wideresnet50_quantized/perf.yaml | 248 +- qai_hub_models/models/xlsr/perf.yaml | 268 +- .../models/xlsr_quantized/info.yaml | 1 + .../models/xlsr_quantized/perf.yaml | 305 +- qai_hub_models/models/yolonas/perf.yaml | 269 +- .../models/yolonas_quantized/info.yaml | 1 + .../models/yolonas_quantized/perf.yaml | 240 +- qai_hub_models/models/yolov11_det/perf.yaml | 263 +- qai_hub_models/models/yolov11_seg/README.md | 63 + qai_hub_models/models/yolov11_seg/__init__.py | 10 + qai_hub_models/models/yolov11_seg/conftest.py | 39 + qai_hub_models/models/yolov11_seg/demo.py | 33 + qai_hub_models/models/yolov11_seg/export.py | 221 ++ qai_hub_models/models/yolov11_seg/info.yaml | 44 + qai_hub_models/models/yolov11_seg/model.py | 126 + qai_hub_models/models/yolov11_seg/perf.yaml | 347 +++ .../models/yolov11_seg/requirements.txt | 3 + qai_hub_models/models/yolov11_seg/test.py | 69 + qai_hub_models/models/yolov3/README.md | 63 + .../{whisper_small_en => yolov3}/__init__.py | 4 +- qai_hub_models/models/yolov3/app.py | 50 + .../{whisper_small_en => yolov3}/conftest.py | 2 +- qai_hub_models/models/yolov3/demo.py | 33 + qai_hub_models/models/yolov3/evaluate.py | 54 + qai_hub_models/models/yolov3/export.py | 221 ++ qai_hub_models/models/yolov3/info.yaml | 42 + qai_hub_models/models/yolov3/model.py | 134 + qai_hub_models/models/yolov3/perf.yaml | 322 ++ qai_hub_models/models/yolov3/requirements.txt | 2 + qai_hub_models/models/yolov6/perf.yaml | 265 +- qai_hub_models/models/yolov7/perf.yaml | 280 +- .../models/yolov7_quantized/evaluate.py | 1 + .../models/yolov7_quantized/export.py | 2 +- .../models/yolov7_quantized/info.yaml | 1 + .../models/yolov7_quantized/perf.yaml | 266 +- qai_hub_models/models/yolov8_det/perf.yaml | 252 +- .../models/yolov8_det_quantized/evaluate.py | 1 + .../models/yolov8_det_quantized/export.py | 2 +- .../models/yolov8_det_quantized/info.yaml | 1 + .../models/yolov8_det_quantized/perf.yaml | 268 +- qai_hub_models/models/yolov8_seg/__init__.py | 5 +- qai_hub_models/models/yolov8_seg/app.py | 203 -- qai_hub_models/models/yolov8_seg/demo.py | 75 +- qai_hub_models/models/yolov8_seg/model.py | 50 +- qai_hub_models/models/yolov8_seg/perf.yaml | 296 +- qai_hub_models/models/yolov8_seg/test.py | 14 +- qai_hub_models/scorecard/device.py | 9 + qai_hub_models/scorecard/execution_helpers.py | 4 +- qai_hub_models/scorecard/path_compile.py | 2 +- qai_hub_models/scorecard/path_profile.py | 2 +- .../scorecard/results/performance_summary.py | 352 +-- .../scorecard/results/scorecard_job.py | 363 +-- qai_hub_models/scorecard/results/yaml.py | 178 +- qai_hub_models/utils/args.py | 5 +- qai_hub_models/utils/asset_loaders.py | 15 +- qai_hub_models/utils/base_model.py | 24 + qai_hub_models/utils/config_loaders.py | 188 +- qai_hub_models/utils/default_export_device.py | 5 + qai_hub_models/utils/image_processing.py | 14 +- qai_hub_models/utils/system_info.py | 4 +- scripts/tasks/util.py | 6 +- 328 files changed, 30803 insertions(+), 19280 deletions(-) create mode 100644 qai_hub_models/labels/ppe_labels.txt rename qai_hub_models/models/{midas => _shared/depth_estimation}/app.py (83%) create mode 100644 qai_hub_models/models/_shared/depth_estimation/demo.py rename qai_hub_models/models/{ => _shared}/face_attrib_net/app.py (100%) create mode 100644 qai_hub_models/models/_shared/face_attrib_net/demo.py create mode 100644 qai_hub_models/models/_shared/face_attrib_net/model.py create mode 100644 qai_hub_models/models/_shared/face_detection/app.py create mode 100644 qai_hub_models/models/_shared/face_detection/demo.py create mode 100644 qai_hub_models/models/_shared/face_detection/model.py rename qai_hub_models/models/{whisper_small_en => depth_anything}/README.md (53%) create mode 100644 qai_hub_models/models/depth_anything/__init__.py create mode 100644 qai_hub_models/models/depth_anything/conftest.py create mode 100644 qai_hub_models/models/depth_anything/demo.py create mode 100644 qai_hub_models/models/depth_anything/export.py create mode 100644 qai_hub_models/models/depth_anything/info.yaml create mode 100644 qai_hub_models/models/depth_anything/model.py create mode 100644 qai_hub_models/models/depth_anything/perf.yaml create mode 100644 qai_hub_models/models/depth_anything/requirements.txt create mode 100644 qai_hub_models/models/depth_anything/test.py create mode 100644 qai_hub_models/models/depth_anything_v2/README.md create mode 100644 qai_hub_models/models/depth_anything_v2/__init__.py create mode 100644 qai_hub_models/models/depth_anything_v2/conftest.py create mode 100644 qai_hub_models/models/depth_anything_v2/demo.py create mode 100644 qai_hub_models/models/depth_anything_v2/export.py create mode 100644 qai_hub_models/models/depth_anything_v2/info.yaml create mode 100644 qai_hub_models/models/depth_anything_v2/model.py create mode 100644 qai_hub_models/models/depth_anything_v2/perf.yaml create mode 100644 qai_hub_models/models/depth_anything_v2/requirements.txt create mode 100644 qai_hub_models/models/depth_anything_v2/test.py create mode 100644 qai_hub_models/models/face_attrib_net_quantized/README.md create mode 100644 qai_hub_models/models/face_attrib_net_quantized/__init__.py create mode 100644 qai_hub_models/models/face_attrib_net_quantized/conftest.py rename qai_hub_models/models/{whisper_small_en => face_attrib_net_quantized}/demo.py (56%) create mode 100644 qai_hub_models/models/face_attrib_net_quantized/export.py create mode 100644 qai_hub_models/models/face_attrib_net_quantized/info.yaml create mode 100644 qai_hub_models/models/face_attrib_net_quantized/model.py create mode 100644 qai_hub_models/models/face_attrib_net_quantized/perf.yaml create mode 100644 qai_hub_models/models/face_attrib_net_quantized/test.py create mode 100644 qai_hub_models/models/face_det_lite_quantized/README.md create mode 100755 qai_hub_models/models/face_det_lite_quantized/__init__.py create mode 100644 qai_hub_models/models/face_det_lite_quantized/conftest.py create mode 100755 qai_hub_models/models/face_det_lite_quantized/demo.py create mode 100644 qai_hub_models/models/face_det_lite_quantized/export.py create mode 100755 qai_hub_models/models/face_det_lite_quantized/info.yaml create mode 100755 qai_hub_models/models/face_det_lite_quantized/model.py create mode 100644 qai_hub_models/models/face_det_lite_quantized/perf.yaml create mode 100755 qai_hub_models/models/face_det_lite_quantized/test.py create mode 100644 qai_hub_models/models/sam/model_patches.py delete mode 100644 qai_hub_models/models/whisper_small_en/export.py delete mode 100644 qai_hub_models/models/whisper_small_en/info.yaml delete mode 100644 qai_hub_models/models/whisper_small_en/model.py delete mode 100644 qai_hub_models/models/whisper_small_en/perf.yaml delete mode 100644 qai_hub_models/models/whisper_small_en/requirements.txt delete mode 100644 qai_hub_models/models/whisper_small_en/test.py create mode 100644 qai_hub_models/models/yolov11_seg/README.md create mode 100644 qai_hub_models/models/yolov11_seg/__init__.py create mode 100644 qai_hub_models/models/yolov11_seg/conftest.py create mode 100644 qai_hub_models/models/yolov11_seg/demo.py create mode 100644 qai_hub_models/models/yolov11_seg/export.py create mode 100644 qai_hub_models/models/yolov11_seg/info.yaml create mode 100644 qai_hub_models/models/yolov11_seg/model.py create mode 100644 qai_hub_models/models/yolov11_seg/perf.yaml create mode 100644 qai_hub_models/models/yolov11_seg/requirements.txt create mode 100644 qai_hub_models/models/yolov11_seg/test.py create mode 100644 qai_hub_models/models/yolov3/README.md rename qai_hub_models/models/{whisper_small_en => yolov3}/__init__.py (67%) create mode 100644 qai_hub_models/models/yolov3/app.py rename qai_hub_models/models/{whisper_small_en => yolov3}/conftest.py (95%) create mode 100644 qai_hub_models/models/yolov3/demo.py create mode 100644 qai_hub_models/models/yolov3/evaluate.py create mode 100644 qai_hub_models/models/yolov3/export.py create mode 100644 qai_hub_models/models/yolov3/info.yaml create mode 100644 qai_hub_models/models/yolov3/model.py create mode 100644 qai_hub_models/models/yolov3/perf.yaml create mode 100644 qai_hub_models/models/yolov3/requirements.txt delete mode 100644 qai_hub_models/models/yolov8_seg/app.py create mode 100644 qai_hub_models/utils/default_export_device.py diff --git a/README.md b/README.md index 4938bdfd..3d1bed65 100644 --- a/README.md +++ b/README.md @@ -235,6 +235,7 @@ and many more. | [SINet](https://aihub.qualcomm.com/models/sinet) | [qai_hub_models.models.sinet](qai_hub_models/models/sinet/README.md) | | [Segment-Anything-Model](https://aihub.qualcomm.com/models/sam) | [qai_hub_models.models.sam](qai_hub_models/models/sam/README.md) | | [Unet-Segmentation](https://aihub.qualcomm.com/models/unet_segmentation) | [qai_hub_models.models.unet_segmentation](qai_hub_models/models/unet_segmentation/README.md) | +| [YOLOv11-Segmentation](https://aihub.qualcomm.com/models/yolov11_seg) | [qai_hub_models.models.yolov11_seg](qai_hub_models/models/yolov11_seg/README.md) | | [YOLOv8-Segmentation](https://aihub.qualcomm.com/models/yolov8_seg) | [qai_hub_models.models.yolov8_seg](qai_hub_models/models/yolov8_seg/README.md) | | | | | **Object Detection** @@ -243,8 +244,10 @@ and many more. | [DETR-ResNet101-DC5](https://aihub.qualcomm.com/models/detr_resnet101_dc5) | [qai_hub_models.models.detr_resnet101_dc5](qai_hub_models/models/detr_resnet101_dc5/README.md) | | [DETR-ResNet50](https://aihub.qualcomm.com/models/detr_resnet50) | [qai_hub_models.models.detr_resnet50](qai_hub_models/models/detr_resnet50/README.md) | | [DETR-ResNet50-DC5](https://aihub.qualcomm.com/models/detr_resnet50_dc5) | [qai_hub_models.models.detr_resnet50_dc5](qai_hub_models/models/detr_resnet50_dc5/README.md) | -| [FaceAttribNet](https://aihub.qualcomm.com/models/face_attrib_net) | [qai_hub_models.models.face_attrib_net](qai_hub_models/models/face_attrib_net/README.md) | +| [Facial-Attribute-Detection](https://aihub.qualcomm.com/models/face_attrib_net) | [qai_hub_models.models.face_attrib_net](qai_hub_models/models/face_attrib_net/README.md) | +| [Facial-Attribute-Detection-Quantized](https://aihub.qualcomm.com/models/face_attrib_net_quantized) | [qai_hub_models.models.face_attrib_net_quantized](qai_hub_models/models/face_attrib_net_quantized/README.md) | | [Lightweight-Face-Detection](https://aihub.qualcomm.com/models/face_det_lite) | [qai_hub_models.models.face_det_lite](qai_hub_models/models/face_det_lite/README.md) | +| [Lightweight-Face-Detection-Quantized](https://aihub.qualcomm.com/models/face_det_lite_quantized) | [qai_hub_models.models.face_det_lite_quantized](qai_hub_models/models/face_det_lite_quantized/README.md) | | [MediaPipe-Face-Detection](https://aihub.qualcomm.com/models/mediapipe_face) | [qai_hub_models.models.mediapipe_face](qai_hub_models/models/mediapipe_face/README.md) | | [MediaPipe-Face-Detection-Quantized](https://aihub.qualcomm.com/models/mediapipe_face_quantized) | [qai_hub_models.models.mediapipe_face_quantized](qai_hub_models/models/mediapipe_face_quantized/README.md) | | [MediaPipe-Hand-Detection](https://aihub.qualcomm.com/models/mediapipe_hand) | [qai_hub_models.models.mediapipe_hand](qai_hub_models/models/mediapipe_hand/README.md) | @@ -257,6 +260,7 @@ and many more. | [YOLOv8-Detection-Quantized](https://aihub.qualcomm.com/models/yolov8_det_quantized) | [qai_hub_models.models.yolov8_det_quantized](qai_hub_models/models/yolov8_det_quantized/README.md) | | [Yolo-NAS](https://aihub.qualcomm.com/models/yolonas) | [qai_hub_models.models.yolonas](qai_hub_models/models/yolonas/README.md) | | [Yolo-NAS-Quantized](https://aihub.qualcomm.com/models/yolonas_quantized) | [qai_hub_models.models.yolonas_quantized](qai_hub_models/models/yolonas_quantized/README.md) | +| [Yolo-v3](https://aihub.qualcomm.com/models/yolov3) | [qai_hub_models.models.yolov3](qai_hub_models/models/yolov3/README.md) | | [Yolo-v6](https://aihub.qualcomm.com/models/yolov6) | [qai_hub_models.models.yolov6](qai_hub_models/models/yolov6/README.md) | | [Yolo-v7](https://aihub.qualcomm.com/models/yolov7) | [qai_hub_models.models.yolov7](qai_hub_models/models/yolov7/README.md) | | [Yolo-v7-Quantized](https://aihub.qualcomm.com/models/yolov7_quantized) | [qai_hub_models.models.yolov7_quantized](qai_hub_models/models/yolov7_quantized/README.md) | @@ -273,6 +277,8 @@ and many more. | [Posenet-Mobilenet-Quantized](https://aihub.qualcomm.com/models/posenet_mobilenet_quantized) | [qai_hub_models.models.posenet_mobilenet_quantized](qai_hub_models/models/posenet_mobilenet_quantized/README.md) | | | | | **Depth Estimation** +| [Depth-Anything](https://aihub.qualcomm.com/models/depth_anything) | [qai_hub_models.models.depth_anything](qai_hub_models/models/depth_anything/README.md) | +| [Depth-Anything-V2](https://aihub.qualcomm.com/models/depth_anything_v2) | [qai_hub_models.models.depth_anything_v2](qai_hub_models/models/depth_anything_v2/README.md) | | [Midas-V2](https://aihub.qualcomm.com/models/midas) | [qai_hub_models.models.midas](qai_hub_models/models/midas/README.md) | | [Midas-V2-Quantized](https://aihub.qualcomm.com/models/midas_quantized) | [qai_hub_models.models.midas_quantized](qai_hub_models/models/midas_quantized/README.md) | @@ -284,7 +290,6 @@ and many more. | **Speech Recognition** | [HuggingFace-WavLM-Base-Plus](https://aihub.qualcomm.com/models/huggingface_wavlm_base_plus) | [qai_hub_models.models.huggingface_wavlm_base_plus](qai_hub_models/models/huggingface_wavlm_base_plus/README.md) | | [Whisper-Base-En](https://aihub.qualcomm.com/models/whisper_base_en) | [qai_hub_models.models.whisper_base_en](qai_hub_models/models/whisper_base_en/README.md) | -| [Whisper-Small-En](https://aihub.qualcomm.com/models/whisper_small_en) | [qai_hub_models.models.whisper_small_en](qai_hub_models/models/whisper_small_en/README.md) | | [Whisper-Tiny-En](https://aihub.qualcomm.com/models/whisper_tiny_en) | [qai_hub_models.models.whisper_tiny_en](qai_hub_models/models/whisper_tiny_en/README.md) | ### Multimodal @@ -292,8 +297,8 @@ and many more. | Model | README | | -- | -- | | | | -| [TrOCR](https://aihub.qualcomm.com/models/trocr) | [qai_hub_models.models.trocr](qai_hub_models/models/trocr/README.md) | | [OpenAI-Clip](https://aihub.qualcomm.com/models/openai_clip) | [qai_hub_models.models.openai_clip](qai_hub_models/models/openai_clip/README.md) | +| [TrOCR](https://aihub.qualcomm.com/models/trocr) | [qai_hub_models.models.trocr](qai_hub_models/models/trocr/README.md) | ### Generative Ai diff --git a/qai_hub_models/_version.py b/qai_hub_models/_version.py index e0623006..f40f8838 100644 --- a/qai_hub_models/_version.py +++ b/qai_hub_models/_version.py @@ -2,4 +2,4 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- -__version__ = "0.19.1" +__version__ = "0.20.0" diff --git a/qai_hub_models/labels/ppe_labels.txt b/qai_hub_models/labels/ppe_labels.txt new file mode 100644 index 00000000..091274ee --- /dev/null +++ b/qai_hub_models/labels/ppe_labels.txt @@ -0,0 +1,2 @@ +helmet +vest diff --git a/qai_hub_models/models/midas/app.py b/qai_hub_models/models/_shared/depth_estimation/app.py similarity index 83% rename from qai_hub_models/models/midas/app.py rename to qai_hub_models/models/_shared/depth_estimation/app.py index 9d2d5e60..06a2b19e 100644 --- a/qai_hub_models/models/midas/app.py +++ b/qai_hub_models/models/_shared/depth_estimation/app.py @@ -15,7 +15,20 @@ from qai_hub_models.utils.image_processing import pil_resize_pad, undo_resize_pad -class MidasApp: +class DepthEstimationApp: + """ + This class is required to perform end to end inference for Depth Estimation + + The app uses 2 models: + * Midas + * DepthAnything + + For a given image input, the app will: + * pre-process the image (convert to range[0, 1]) + * Run DepthAnything inference + * Convert the depth into visual representation(heatmap) and return as image + """ + def __init__( self, model: Callable[[torch.Tensor], torch.Tensor], diff --git a/qai_hub_models/models/_shared/depth_estimation/demo.py b/qai_hub_models/models/_shared/depth_estimation/demo.py new file mode 100644 index 00000000..fe9a1f09 --- /dev/null +++ b/qai_hub_models/models/_shared/depth_estimation/demo.py @@ -0,0 +1,49 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- + +from qai_hub_models.models._shared.depth_estimation.app import DepthEstimationApp +from qai_hub_models.utils.args import ( + demo_model_from_cli_args, + get_model_cli_parser, + get_on_device_demo_parser, + validate_on_device_demo_args, +) +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image +from qai_hub_models.utils.base_model import BaseModel +from qai_hub_models.utils.display import display_or_save_image + + +# The demo will display a heatmap of the estimated depth at each point in the image. +def depth_estimation_demo( + model_cls: type[BaseModel], + model_id, + default_image: CachedWebModelAsset, + is_test: bool = False, +): + parser = get_model_cli_parser(model_cls) + parser = get_on_device_demo_parser(parser, add_output_dir=True) + parser.add_argument( + "--image", + type=str, + default=default_image, + help="image file path or URL", + ) + args = parser.parse_args([] if is_test else None) + model = demo_model_from_cli_args(model_cls, model_id, args) + validate_on_device_demo_args(args, model_id) + + # Load image + (_, _, height, width) = model_cls.get_input_spec()["image"][0] + image = load_image(args.image) + print("Model Loaded") + + app = DepthEstimationApp(model, height, width) + heatmap_image = app.estimate_depth(image) + + if not is_test: + # Resize / unpad annotated image + display_or_save_image( + heatmap_image, args.output_dir, "out_heatmap.png", "heatmap" + ) diff --git a/qai_hub_models/models/face_attrib_net/app.py b/qai_hub_models/models/_shared/face_attrib_net/app.py similarity index 100% rename from qai_hub_models/models/face_attrib_net/app.py rename to qai_hub_models/models/_shared/face_attrib_net/app.py diff --git a/qai_hub_models/models/_shared/face_attrib_net/demo.py b/qai_hub_models/models/_shared/face_attrib_net/demo.py new file mode 100644 index 00000000..2c9a803c --- /dev/null +++ b/qai_hub_models/models/_shared/face_attrib_net/demo.py @@ -0,0 +1,57 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +import json +from pathlib import Path + +from qai_hub_models.models._shared.face_attrib_net.app import FaceAttribNetApp +from qai_hub_models.models.face_attrib_net.model import ( + MODEL_ASSET_VERSION, + MODEL_ID, + OUT_NAMES, + FaceAttribNet, +) +from qai_hub_models.utils.args import ( + demo_model_from_cli_args, + get_model_cli_parser, + get_on_device_demo_parser, + validate_on_device_demo_args, +) +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image + +INPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, "img_sample.bmp" +) + + +# Run FaceAttribNet end-to-end on a sample image. +def face_attrib_net_demo(model_cls: type[FaceAttribNet], is_test: bool = False): + # Demo parameters + parser = get_model_cli_parser(model_cls) + parser = get_on_device_demo_parser(parser, add_output_dir=True) + parser.add_argument( + "--image", + type=str, + default=INPUT_IMAGE_ADDRESS, + help="image file path or URL", + ) + args = parser.parse_args([]) + model = demo_model_from_cli_args(model_cls, MODEL_ID, args) + validate_on_device_demo_args(args, MODEL_ID) + + # Load image + _, _, height, width = model_cls.get_input_spec()["image"][0] + orig_image = load_image(args.image) + print("Model loaded") + + app = FaceAttribNetApp(model) + output = app.run_inference_on_image(orig_image) + out_dict = {} + for i in range(len(output)): + out_dict[OUT_NAMES[i]] = list(output[i].astype(float)) + + output_path = (args.output_dir or str(Path() / "build")) + "/output.json" + with open(output_path, "w", encoding="utf-8") as wf: + json.dump(out_dict, wf, ensure_ascii=False, indent=4) + print(f"Model outputs are saved at: {output_path}") diff --git a/qai_hub_models/models/_shared/face_attrib_net/model.py b/qai_hub_models/models/_shared/face_attrib_net/model.py new file mode 100644 index 00000000..6b95ad36 --- /dev/null +++ b/qai_hub_models/models/_shared/face_attrib_net/model.py @@ -0,0 +1,644 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from __future__ import annotations + +import torch +import torch.nn as nn + + +class FaceNet(nn.Module): + def __init__( + self, + chan, + blks_per_layer, + fea_only=True, + liveness=True, + openness=True, + glasses=True, + mask=True, + sunglasses=True, + group_size=32, + activ_type="prelu", + ): + super().__init__() + + self.head_converter = nn.Conv2d( + 3, 1, 1, stride=1, padding=0, groups=1, bias=False + ) + self.chan = chan + self.head = HeadBlock(chan) + self.blks_per_layer = blks_per_layer + self.fea_only = fea_only + + self.main_module = nn.ModuleList() + for i in range(len(self.blks_per_layer)): + self.main_module.append(self._make_net(self.chan, self.blks_per_layer[i])) + self.chan *= 2 + + self.embed = EmbedBlock(self.chan) + + self.liveness = liveness + if self.liveness: + self.base_chan = chan + self.liveness_bran1_gconv = Conv2dBlock( + self.base_chan * 4, + self.base_chan * 2, + 3, + padding=1, + stride=2, + group=self.base_chan * 4 // group_size, + norm="bn", + activ="none", + ) + self.liveness_bran1_conv = Conv2dBlock( + self.base_chan * 2, + self.base_chan * 2, + 1, + padding=0, + stride=1, + group=1, + norm="bn", + activ=activ_type, + ) + self.liveness_bran2_gconv = Conv2dBlock( + self.base_chan * 10, + self.base_chan * 5, + 3, + padding=1, + stride=2, + group=self.base_chan * 10 // group_size, + norm="bn", + activ="none", + ) + self.liveness_bran2_conv = Conv2dBlock( + self.base_chan * 5, + self.base_chan, + 1, + padding=0, + stride=1, + group=1, + norm="bn", + activ=activ_type, + ) + self.liveness_fc = nn.Linear(self.base_chan * 4 * 4, self.base_chan // 2) + + self.openness = openness + if self.openness: + self.base_chan = chan + self.openness_bran1_gconv = Conv2dBlock( + self.base_chan * 4, + self.base_chan * 2, + 3, + padding=1, + stride=2, + group=self.base_chan * 4 // group_size, + norm="none", + activ="none", + ) + self.openness_bran1_conv = Conv2dBlock( + self.base_chan * 2, + self.base_chan * 2, + 1, + padding=0, + stride=1, + group=1, + norm="bn", + activ=activ_type, + ) + self.openness_bran2_gconv = Conv2dBlock( + self.base_chan * 10, + self.base_chan * 5, + 3, + padding=1, + stride=2, + group=self.base_chan * 10 // group_size, + norm="none", + activ="none", + ) + self.openness_bran2_conv = Conv2dBlock( + self.base_chan * 5, + self.base_chan * 2, + 1, + padding=0, + stride=1, + group=1, + norm="bn", + activ=activ_type, + ) + self.openness_ave = nn.AvgPool2d(kernel_size=4, stride=1) + self.openness_cls = nn.Linear(self.base_chan * 2, 2) + + self.glasses = glasses + if self.glasses: + self.base_chan = chan + self.eyeglasses_bran1_gconv = Conv2dBlock( + self.base_chan * 4, + self.base_chan * 2, + 3, + padding=1, + stride=2, + group=self.base_chan * 4 // group_size, + norm="none", + activ="none", + ) + self.eyeglasses_bran1_conv = Conv2dBlock( + self.base_chan * 2, + self.base_chan * 2, + 1, + padding=0, + stride=1, + group=1, + norm="bn", + activ=activ_type, + ) + self.eyeglasses_bran2_gconv = Conv2dBlock( + self.base_chan * 10, + self.base_chan * 5, + 3, + padding=1, + stride=2, + group=self.base_chan * 10 // group_size, + norm="none", + activ="none", + ) + self.eyeglasses_bran2_conv = Conv2dBlock( + self.base_chan * 5, + self.base_chan * 2, + 1, + padding=0, + stride=1, + group=1, + norm="bn", + activ=activ_type, + ) + self.eyeglasses_ave = nn.AvgPool2d(kernel_size=4, stride=1) + self.eyeglasses_cls = nn.Linear(self.base_chan * 2, 2) + + self.sunglasses = sunglasses + if self.sunglasses: + self.base_chan = chan + self.sunglasses_bran1_gconv = Conv2dBlock( + self.base_chan * 4, + self.base_chan * 2, + 3, + padding=1, + stride=2, + group=self.base_chan * 4 // group_size, + norm="none", + activ="none", + ) + self.sunglasses_bran1_conv = Conv2dBlock( + self.base_chan * 2, + self.base_chan * 2, + 1, + padding=0, + stride=1, + group=1, + norm="bn", + activ=activ_type, + ) + self.sunglasses_bran2_gconv = Conv2dBlock( + self.base_chan * 10, + self.base_chan * 5, + 3, + padding=1, + stride=2, + group=self.base_chan * 10 // group_size, + norm="none", + activ="none", + ) + self.sunglasses_bran2_conv = Conv2dBlock( + self.base_chan * 5, + self.base_chan * 2, + 1, + padding=0, + stride=1, + group=1, + norm="bn", + activ=activ_type, + ) + self.sunglasses_ave = nn.AvgPool2d(kernel_size=4, stride=1) + self.sunglasses_cls = nn.Linear(self.base_chan * 2, 2) + self.sunglasses_softmax = nn.Softmax(dim=1) + + self.mask = mask + if self.mask: + self.base_chan = chan + self.mask_bran1_gconv = Conv2dBlock( + self.base_chan * 4, + self.base_chan * 2, + 3, + padding=1, + stride=2, + group=self.base_chan * 4 // group_size, + norm="none", + activ="none", + ) + self.mask_bran1_conv = Conv2dBlock( + self.base_chan * 2, + self.base_chan * 2, + 1, + padding=0, + stride=1, + group=1, + norm="bn", + activ=activ_type, + ) + self.mask_bran2_gconv = Conv2dBlock( + self.base_chan * 10, + self.base_chan * 5, + 3, + padding=1, + stride=2, + group=self.base_chan * 10 // group_size, + norm="none", + activ="none", + ) + self.mask_bran2_conv = Conv2dBlock( + self.base_chan * 5, + self.base_chan * 2, + 1, + padding=0, + stride=1, + group=1, + norm="bn", + activ=activ_type, + ) + self.mask_ave = nn.AvgPool2d(kernel_size=4, stride=1) + self.mask_cls = nn.Linear(self.base_chan * 2, 2) + + def _make_net(self, chan, n): + cnn_x = [] + cnn_x += [DownsampleBlock(chan)] + for i in range(n - 1): + cnn_x += [NormalBlock(2 * chan)] + cnn_x = nn.Sequential(*cnn_x) + return cnn_x + + def forward(self, x, target=None): + x = self.head_converter(x) + fea1 = self.head(x) + + fea2 = self.main_module[0](fea1) + fea3 = self.main_module[1](fea2) + fea4 = self.main_module[2](fea3) + + fr_out = self.embed(fea4) + + outputs = [] + outputs.append(fr_out) + + if self.liveness: + a = self.liveness_bran1_conv(self.liveness_bran1_gconv(fea3)) + a = torch.cat((a, fea4), dim=1) + a = self.liveness_bran2_conv(self.liveness_bran2_gconv(a)) + a = a.flatten(start_dim=1) + a = self.liveness_fc(a) + outputs.append(a) + + if self.openness: + a = self.openness_bran1_conv(self.openness_bran1_gconv(fea3)) + a = torch.cat((a, fea4), dim=1) + a = self.openness_ave( + self.openness_bran2_conv(self.openness_bran2_gconv(a)) + ) + a = a.flatten(start_dim=1) + a = self.openness_cls(a) + outputs.append(a) + + if self.glasses: + a = self.eyeglasses_bran1_gconv(fea3) + a = self.eyeglasses_bran1_conv(a) + a = torch.cat((a, fea4), dim=1) + a = self.eyeglasses_ave( + self.eyeglasses_bran2_conv(self.eyeglasses_bran2_gconv(a)) + ) + a = a.flatten(start_dim=1) + a = self.eyeglasses_cls(a) + outputs.append(a) + + if self.mask: + a = self.mask_bran1_gconv(fea3) + a = self.mask_bran1_conv(a) + a = torch.cat((a, fea4), dim=1) + a = self.mask_ave(self.mask_bran2_conv(self.mask_bran2_gconv(a))) + a = a.flatten(start_dim=1) + a = self.mask_cls(a) + outputs.append(a) + + if self.sunglasses: + a = self.sunglasses_bran1_conv(self.sunglasses_bran1_gconv(fea3)) + a = torch.cat((a, fea4), dim=1) + a = self.sunglasses_ave( + self.sunglasses_bran2_conv(self.sunglasses_bran2_gconv(a)) + ) + a = a.flatten(start_dim=1) + a = self.sunglasses_cls(a) + a = self.sunglasses_softmax(a) + outputs.append(a) + + return outputs + + +########################################### +# HeadBlock, DownsampleBlock, NormalBlock, and EmbedBlock +########################################## +# HeadBlock +class HeadBlock(nn.Module): + def __init__(self, chan, group_size=32, activ_type="prelu"): + super().__init__() + self.conv = Conv2dBlock( + 1, chan, 3, padding=1, stride=1, group=1, norm="bn", activ=activ_type + ) + self.bran1 = [ + Conv2dBlock( + chan, + chan, + 3, + padding=1, + stride=2, + group=chan // group_size, + norm="none", + activ="none", + ), + Conv2dBlock( + chan, chan, 1, padding=0, stride=1, group=1, norm="bn", activ=activ_type + ), + Conv2dBlock( + chan, + chan, + 3, + padding=1, + stride=1, + group=chan // group_size, + norm="none", + activ="none", + ), + Conv2dBlock( + chan, chan, 1, padding=0, stride=1, group=1, norm="bn", activ="none" + ), + ] + + self.bran1 = nn.Sequential(*self.bran1) + + self.bran2 = [ + Conv2dBlock( + chan, + chan, + 3, + padding=1, + stride=2, + group=chan // group_size, + norm="none", + activ="none", + ), + Conv2dBlock( + chan, chan, 1, padding=0, stride=1, group=1, norm="bn", activ="none" + ), + ] + self.bran2 = nn.Sequential(*self.bran2) + + if activ_type == "prelu": + self.activ = nn.PReLU() + elif activ_type == "relu": + self.activ = nn.ReLU() + else: + self.activ = None + assert 0, f"Unsupported activation function: {activ_type}" + + def forward(self, x): + x = self.conv(x) + x = self.bran1(x) + self.bran2(x) + if self.activ: + x = self.activ(x) + return x + + +# DownsampleBlock +class DownsampleBlock(nn.Module): + def __init__(self, chan, group_size=32, activ_type="prelu"): + super().__init__() + assert ( + chan % group_size == 0 + ), f"chan {chan:d} cannot be divided by group_size {group_size:d}" + + self.bran1 = nn.Sequential( + Conv2dBlock( + chan, + 2 * chan, + 3, + padding=1, + stride=2, + group=chan // group_size, + norm="none", + activ="none", + ), + Conv2dBlock( + 2 * chan, + 2 * chan, + 1, + padding=0, + stride=1, + group=1, + norm="bn", + activ=activ_type, + ), + ) + + self.bran2 = [ + Conv2dBlock( + chan, + 4 * chan, + 3, + padding=1, + stride=2, + group=chan // group_size, + norm="none", + activ="none", + ), + Conv2dBlock( + 4 * chan, + 2 * chan, + 1, + padding=0, + stride=1, + group=1, + norm="bn", + activ=activ_type, + ), + Conv2dBlock( + 2 * chan, + 4 * chan, + 3, + padding=1, + stride=1, + group=chan // group_size, + norm="none", + activ="none", + ), + Conv2dBlock( + 4 * chan, + 2 * chan, + 1, + padding=0, + stride=1, + group=1, + norm="bn", + activ="none", + ), + ] + + self.bran2 = nn.Sequential(*self.bran2) + + if activ_type == "prelu": + self.activ = nn.PReLU() + elif activ_type == "relu": + self.activ = nn.ReLU() + else: + self.activ = None + assert 0, f"Unsupported activation function: {activ_type}" + + def forward(self, x): + x = self.bran1(x) + self.bran2(x) + if self.activ: + x = self.activ(x) + return x + + +# NormalBlock +class NormalBlock(nn.Module): + def __init__(self, chan, group_size=32, activ_type="prelu"): + super().__init__() + assert ( + chan % group_size == 0 + ), f"chan {chan:d} cannot be divided by group_size {group_size:d}" + self.model = [ + Conv2dBlock( + chan, + 2 * chan, + 3, + padding=1, + stride=1, + group=chan // group_size, + norm="none", + activ="none", + ), + Conv2dBlock( + 2 * chan, + chan, + 1, + padding=0, + stride=1, + group=1, + norm="bn", + activ="prelu", + ), + Conv2dBlock( + chan, + 2 * chan, + 3, + padding=1, + stride=1, + group=chan // group_size, + norm="none", + activ="none", + ), + Conv2dBlock( + 2 * chan, chan, 1, padding=0, stride=1, group=1, norm="bn", activ="none" + ), + ] + + self.model = nn.Sequential(*self.model) + + if activ_type == "prelu": + self.activ = nn.PReLU() + elif activ_type == "relu": + self.activ = nn.ReLU() + else: + self.activ = None + assert 0, f"Unsupported activation function: {activ_type}" + + def forward(self, x): + x = self.model(x) + x + if self.activ: + x = self.activ(x) + return x + + +class EmbedBlock(nn.Module): + def __init__(self, chan, group_size=32, activ_type="prelu"): + super().__init__() + self.model = nn.Sequential( + Conv2dBlock( + chan, + chan, + 8, + padding=0, + stride=1, + group=chan // group_size, + norm="none", + activ="none", + ), + Conv2dBlock( + chan, chan, 1, padding=0, stride=1, group=1, norm="bn", activ=activ_type + ), + ) + + def forward(self, x): + x = self.model(x) + return x.flatten(start_dim=1) + + +########################################### +# Basic Blocks +########################################## +class Conv2dBlock(nn.Module): + def __init__( + self, + in_chan, + out_chan, + kernel_size, + padding=0, + stride=1, + group=1, + norm="none", + activ="none", + ): + super().__init__() + self.conv = nn.Conv2d( + in_chan, + out_chan, + kernel_size, + stride=stride, + padding=padding, + groups=group, + bias=False, + ) + + if norm == "bn": + self.norm = nn.BatchNorm2d(out_chan) + elif norm == "none": + self.norm = None + else: + assert 0, f"Unsupported normalization: {norm}" + + if activ == "prelu": + self.activ = nn.PReLU() + elif activ == "relu": + self.activ = nn.ReLU() + elif activ == "sigmoid": + self.activ = nn.Sigmoid() + elif activ == "none": + self.activ = None + else: + assert 0, f"Unsupported activation layer: {activ}" + + def forward(self, x): + x = self.conv(x) + if self.norm: + x = self.norm(x) + if self.activ: + x = self.activ(x) + return x diff --git a/qai_hub_models/models/_shared/face_detection/app.py b/qai_hub_models/models/_shared/face_detection/app.py new file mode 100644 index 00000000..0434f878 --- /dev/null +++ b/qai_hub_models/models/_shared/face_detection/app.py @@ -0,0 +1,32 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +import numpy as np +import torch + +from qai_hub_models.utils.image_processing import resize_pad + + +def preprocess(img: np.ndarray, height: int, width: int): + """ + Preprocess model input. + + Inputs: + img: np.ndarray + Input image of shape [H, W, C] + height: int + Model input height. + width: int + Model input width + Outputs: + input: torch.Tensor + Preprocessed model input. Shape is (1, C, H, W) + scale: float + Scaling factor of input image and network input image. + pad: List[float] + Top and left padding size. + """ + img = torch.from_numpy(img).unsqueeze_(0).unsqueeze_(0) / 255.0 + input, scale, pad = resize_pad(img, (height, width)) + return input, scale, pad diff --git a/qai_hub_models/models/_shared/face_detection/demo.py b/qai_hub_models/models/_shared/face_detection/demo.py new file mode 100644 index 00000000..c820226c --- /dev/null +++ b/qai_hub_models/models/_shared/face_detection/demo.py @@ -0,0 +1,67 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +import json +from pathlib import Path + +from qai_hub_models.models.face_det_lite.app import FaceDetLiteApp +from qai_hub_models.models.face_det_lite.model import ( + MODEL_ASSET_VERSION, + MODEL_ID, + FaceDetLite_model, +) +from qai_hub_models.utils.args import ( + demo_model_from_cli_args, + get_model_cli_parser, + get_on_device_demo_parser, + validate_on_device_demo_args, +) +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image + +INPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, "test_640x480_Rooney.jpg" +) + + +# Run face_det_lite model end-to-end on a sample image. +# The demo will output the face bounding boxes in json files +# the bounding box represented by left, top, width, and height. +def main( + model_cls: type[FaceDetLite_model] = FaceDetLite_model, + model_id: str = MODEL_ID, + is_test: bool = False, +): + # Demo parameters + parser = get_model_cli_parser(model_cls) + parser = get_on_device_demo_parser(parser, add_output_dir=True) + parser.add_argument( + "--image", + type=str, + default=INPUT_IMAGE_ADDRESS, + help="image file path or URL", + ) + # args = parser.parse_args([] if is_test else None) + args = parser.parse_args([]) + model = demo_model_from_cli_args(model_cls, MODEL_ID, args) + validate_on_device_demo_args(args, MODEL_ID) + + # Load image + (_, _, height, width) = FaceDetLite_model.get_input_spec()["input"][0] + orig_image = load_image(args.image) + print("Model Loaded") + + app = FaceDetLiteApp(model) + res = app.run_inference_on_image(orig_image) + out_dict = {} + + out_dict["bounding box"] = str(res) + + if not is_test: + output_path = ( + args.output_dir or str(Path() / "build") + ) + "/FaceDetLitebNet_output.json" + + with open(output_path, "w", encoding="utf-8") as wf: + json.dump(out_dict, wf, ensure_ascii=False, indent=4) + print(f"Model outputs are saved at: {output_path}") diff --git a/qai_hub_models/models/_shared/face_detection/model.py b/qai_hub_models/models/_shared/face_detection/model.py new file mode 100644 index 00000000..58a4f18e --- /dev/null +++ b/qai_hub_models/models/_shared/face_detection/model.py @@ -0,0 +1,259 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from __future__ import annotations + +import math + +import torch +import torch.nn as nn + +from qai_hub_models.models._shared.face_detection.layers import ( + Block3x3, + CBAModule, + DetectModule, + HeadModule, + SeModule, + UpModule, +) +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_torch +from qai_hub_models.utils.base_model import BaseModel +from qai_hub_models.utils.input_spec import InputSpec + +MODEL_ID = "face_det_lite" +MODEL_ASSET_VERSION = "1" +DEFAULT_WEIGHTS = "qfd360_sl_model.pt" + + +class FaceDetLite_model(BaseModel): + """ + qualcomm face detector model. + Detect bounding box for face, + Detect landmarks: face landmarks. + The output will be saved as 3 maps which will be decoded to final result in the FaceDetLite_App. + """ + + def __init__(self, model: nn.Module) -> None: + super().__init__() + self.model = model + + @classmethod + def from_pretrained(cls, checkpoint_path: str | None = None): + """Load FaceDetLite from a weightfile created by the source FaceDetLite repository.""" + + checkpoint_path = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_WEIGHTS + ) + FaceDetLite_model = FaceDetLite() + FaceDetLite_model.load_state_dict(load_torch(checkpoint_path)["model_state"]) + FaceDetLite_model.to(torch.device("cpu")) + + return cls(FaceDetLite_model) + + def forward(self, image): + """ + Run FaceDetLite on `image`, and produce a the list of face bounding box + + Parameters: + image: Pixel values pre-processed for encoder consumption. + Range: float[0, 1] + 1-channel gray scale image + + Returns: + heatmap: N,C,H,W the heatmap for the person/face detection. + bbox: N,C*4, H,W the bounding box coordinate as a map. + landmark: N,C*10,H,W the coordinates of landmarks as a map. + """ + return self.model(image) + + @staticmethod + def get_input_spec( + batch_size: int = 1, + height: int = 480, + width: int = 640, + ) -> InputSpec: + """ + Returns the input specification (name -> (shape, type). This can be + used to submit profiling job on Qualcomm AI Hub. + """ + return {"input": ((batch_size, 1, height, width), "float32")} + + @staticmethod + def get_output_names() -> list[str]: + return ["heatmap", "bbox", "landmark"] + + @staticmethod + def get_channel_last_inputs() -> list[str]: + return ["input"] + + @staticmethod + def get_channel_last_outputs() -> list[str]: + return ["heatmap", "bbox", "landmark"] + + +class FaceDetLite(nn.Module): + def __init__( + self, + wide: int = 32, + has_ext: bool = False, + upmode: str = "UCBA", + act: str = "relu", + RGB: bool = False, + has_se: bool = True, + phase: str = "train", + ): + super().__init__() + """ + FaceDetLite face detector model for face and landmark detection. + output face bounding box and 5 landmarks. + + Parameters: + wide: the channel size of bandwith of the intermediate layers + has_ext: if add extension layer in the head module. + upmode: upsampling mode. + act: activation function. + RGB: if the input is a 3 channel RGB + has_se: if has the se module + phase: "train" or "test" + + Returns: + FaceDetLite model instance. + """ + self.use_rgb = RGB + self.has_landmark = True + # define backbone + self.bb = Mbv3SmallFast(act, RGB, has_se) + + c1, c2 = self.bb.uplayer_shape + act = "relu" + self.conv3 = CBAModule( + self.bb.output_channels, + wide, + kernel_size=1, + stride=1, + padding=0, + bias=False, + act=act, + ) # s32 + self.connect1 = CBAModule(c1, wide, kernel_size=1, act=act) # s8 + self.connect2 = CBAModule(c2, wide, kernel_size=1, act=act) # s16 + + self.up0 = UpModule( + wide, wide, kernel_size=2, stride=2, mode=upmode, act=act + ) # s16 + self.up1 = UpModule( + wide, wide, kernel_size=2, stride=2, mode=upmode, act=act + ) # s8 + self.detect = DetectModule(wide, act=act) + + self.center = HeadModule(wide, 1, act=act) + self.box = HeadModule(wide, 4, act=act) + + if self.has_landmark: + self.landmark = HeadModule(wide, 10, act=act) + self.phase = phase + + self.bridge = nn.Conv2d( + wide * 2, wide, kernel_size=1, stride=1, padding=0, bias=False + ) + + def forward(self, input): + """ + input: N,C,H,W (1,1,480,640) tensor of input image + return: 3 tensors including + heatmap: N,C,H,W (1,1,60,80) + bbox: N,C,H,W (1,4,120,80) + landmark: N,C,H,W (1,10,60,80) + """ + + s8_, s16_, s32_ = self.bb(input) + s32 = self.conv3(s32_) + + s16 = self.up0(s32) + self.connect2(s16_) + s8 = self.up1(s16) + self.connect1(s8_) + x = self.detect(s8) # s4: B,C,200,200 + + center = self.center(x) + box = self.box(x) + + if self.has_landmark: + landmark = self.landmark(x) + if self.phase == "test": + return center.sigmoid(), box, landmark + + return center, box, landmark + + +class Mbv3SmallFast(nn.Module): + def __init__(self, act="relu", RGB=True, has_se=True): + super().__init__() + + self.keep = [2, 7] + self.uplayer_shape = [32, 64] + self.output_channels = 96 + + if RGB: + self.conv1 = nn.Conv2d( + 3, 16, kernel_size=3, stride=2, padding=1, bias=False + ) + else: # for gray + self.conv1 = nn.Conv2d( + 1, 16, kernel_size=3, stride=2, padding=1, bias=False + ) + + self.bn1 = nn.BatchNorm2d(16) + if act == "relu": + self.hs1 = nn.ReLU(inplace=True) + if has_se: + self.bneck = nn.Sequential( + Block3x3(3, 16, 16, 16, self.hs1, None, 2), # 0 * + Block3x3(3, 16, 64, 32, self.hs1, None, 2), # 1 + Block3x3(3, 32, 96, 32, self.hs1, None, 1), # 2 * + Block3x3(5, 32, 96, 32, self.hs1, SeModule(32), 2), # 3 + Block3x3(5, 32, 224, 32, self.hs1, SeModule(32), 1), # 4 + Block3x3(5, 32, 224, 32, self.hs1, SeModule(32), 1), # 5 + Block3x3(5, 32, 128, 64, self.hs1, SeModule(64), 1), # 6 + Block3x3(5, 64, 160, 64, self.hs1, SeModule(64), 1), # 7 * + Block3x3(5, 64, 256, 96, self.hs1, SeModule(96), 2), # 8 + ) + + else: + self.bneck = nn.Sequential( + Block3x3(3, 16, 16, 16, self.hs1, None, 2), # 0 * + Block3x3(3, 16, 72, 24, self.hs1, None, 2), # 1 + Block3x3(3, 24, 88, 24, self.hs1, None, 1), # 2 * + Block3x3(5, 24, 96, 40, self.hs1, None, 2), # 3 + Block3x3(5, 40, 240, 40, self.hs1, None, 1), # 4 + Block3x3(5, 40, 240, 40, self.hs1, None, 1), # 5 + Block3x3(5, 40, 120, 48, self.hs1, None, 1), # 6 + Block3x3(5, 48, 144, 48, self.hs1, None, 1), # 7 * + Block3x3(5, 48, 288, 96, self.hs1, None, 2), # 8 + ) + + def initialize_weights(self): + print("random init...") + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2.0 / n)) + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + n = m.weight.size(1) + m.weight.data.normal_(0, 0.01) + m.bias.data.zero_() + + def forward(self, x): + x = self.hs1(self.bn1(self.conv1(x))) + outs = [] + for index, item in enumerate(self.bneck): + x = item(x) + + if index in self.keep: + outs.append(x) + outs.append(x) + return outs diff --git a/qai_hub_models/models/_shared/yolo/app.py b/qai_hub_models/models/_shared/yolo/app.py index faf31d82..98e3e0d5 100644 --- a/qai_hub_models/models/_shared/yolo/app.py +++ b/qai_hub_models/models/_shared/yolo/app.py @@ -8,11 +8,14 @@ import numpy as np import torch -from PIL.Image import Image +import torch.nn.functional as F +from PIL import Image +from torchvision.transforms import Resize +from ultralytics.utils.ops import process_mask from qai_hub_models.models._shared.yolo.utils import detect_postprocess from qai_hub_models.utils.bounding_box_processing import batched_nms -from qai_hub_models.utils.draw import draw_box_from_xyxy +from qai_hub_models.utils.draw import create_color_map, draw_box_from_xyxy from qai_hub_models.utils.image_processing import app_to_net_image_inputs @@ -45,7 +48,7 @@ def __init__( Initialize a YoloObjectDetectionApp application. Parameters: - model: torch.Tensor + model: Yolo object detection model. Inputs: @@ -184,3 +187,192 @@ class scores multiplied by confidence: Shape is [batch, num_preds] Shape is [batch, num_preds] where the last dim is the index of the most probable class of the prediction. """ return detect_postprocess(predictions[0]) + + +class YoloSegmentationApp: + """ + This class consists of light-weight "app code" that is required to perform end to end inference + with Yolo segmentation model. + + The app works with following models: + * YoloV8Segmentation + * YoloV11Segmentation + + For a given image input, the app will: + * pre-process the image (convert to range[0, 1]) + * Run Yolo inference + * By default, + - post-processes output using non-maximum-suppression + - applies predicted mask on input image + """ + + def __init__( + self, + model: Callable[ + [torch.Tensor], + tuple[ + list[torch.Tensor], + list[torch.Tensor], + list[torch.Tensor], + list[torch.Tensor], + torch.Tensor, + ], + ], + nms_score_threshold: float = 0.45, + nms_iou_threshold: float = 0.7, + input_height: int = 640, + input_width: int = 640, + ): + """ + Initialize a YoloSegmentationApp application. + + Parameters: + model: + Yolo Segmentation model + + Inputs: + Tensor of shape (N H W C x float32) with range [0, 1] and BGR channel layout. + + Outputs: + boxes: torch.Tensor + Bounding box locations. Shape is [batch, num preds, 4] where 4 == (x1, y1, x2, y2) + scores: torch.Tensor + Class scores multiplied by confidence: Shape is [batch, num_preds] + masks: torch.Tensor + Predicted masks: Shape is [batch, num_preds, 32] + classes: torch.Tensor + Shape is [batch, num_preds] where the last dim is the index of the most probable class of the prediction. + protos: torch.Tensor + Tensor of shape[batch, 32, mask_h, mask_w] + Multiply masks and protos to generate output masks. + + nms_score_threshold + Score threshold for non maximum suppression. + + nms_iou_threshold + Intersection over Union threshold for non maximum suppression. + """ + self.model = model + self.nms_score_threshold = nms_score_threshold + self.nms_iou_threshold = nms_iou_threshold + self.input_height = input_height + self.input_width = input_width + + def check_image_size(self, pixel_values: torch.Tensor) -> None: + """ + Verify image size is valid model input. + """ + return all([s % 32 == 0 for s in pixel_values.shape[-2:]]) + + def preprocess_input(self, pixel_values: torch.Tensor) -> torch.Tensor: + img_size = (self.input_height, self.input_width) + return Resize(img_size)(pixel_values) + + def predict(self, *args, **kwargs): + # See predict_boxes_from_image. + return self.predict_segmentation_from_image(*args, **kwargs) + + def predict_segmentation_from_image( + self, + pixel_values_or_image: torch.Tensor | np.ndarray | Image | list[Image], + raw_output: bool = False, + ) -> tuple[ + list[torch.Tensor], list[torch.Tensor], list[torch.Tensor], list[torch.Tensor] + ] | list[Image.Image]: + """ + From the provided image or tensor, predict the bounding boxes & classes of objects detected within. + + Parameters: + pixel_values_or_image: torch.Tensor + PIL image + or + numpy array (N H W C x uint8) or (H W C x uint8) -- both BGR channel layout + or + pyTorch tensor (N C H W x fp32, value range is [0, 1]), BGR channel layout + + raw_output: bool + See "returns" doc section for details. + + Returns: + If raw_output is false or pixel_values_or_image is not a PIL image, returns: + pred_boxes: list[torch.Tensor] + List of predicted boxes for all the batches. + Each pred_box is of shape [num_boxes, 4] + pred_scores: list[torch.Tensor] + List of scores for each predicted box for all the batches. + Each pred_score is of shape [num_boxes] + pred_masks: list[torch.Tensor] + List of predicted masks for all the batches. + Each pred_mask is of shape [num_boxes, 32] + pred_classes: list[torch.Tensor] + List of predicted class for all the batches. + Each pred_class is of shape [num_boxes] + + Otherwise, returns: + image_with_masks: list[PIL.Image] + Input image with predicted masks applied + """ + + # Input Prep + NHWC_int_numpy_frames, NCHW_fp32_torch_frames = app_to_net_image_inputs( + pixel_values_or_image + ) + + # Cache input spatial dimension to use for post-processing + input_h, input_w = NCHW_fp32_torch_frames.shape[2:] + NCHW_fp32_torch_frames = self.preprocess_input(NCHW_fp32_torch_frames) + + self.check_image_size(NCHW_fp32_torch_frames) + + # Run prediction + pred_boxes, pred_scores, pred_masks, pred_class_idx, proto = self.model( + NCHW_fp32_torch_frames + ) + + # Non Maximum Suppression on each batch + pred_boxes, pred_scores, pred_class_idx, pred_masks = batched_nms( + self.nms_iou_threshold, + self.nms_score_threshold, + pred_boxes, + pred_scores, + pred_class_idx, + pred_masks, + ) + + # Process mask and upsample to input shape + for batch_idx in range(len(pred_masks)): + pred_masks[batch_idx] = process_mask( + proto[batch_idx], + pred_masks[batch_idx], + pred_boxes[batch_idx], + (self.input_height, self.input_width), + upsample=True, + ).numpy() + + # Resize masks to match with input image shape + pred_masks = F.interpolate( + input=torch.Tensor(pred_masks), + size=(input_h, input_w), + mode="bilinear", + align_corners=False, + ) + + # Return raw output if requested + if raw_output or isinstance(pixel_values_or_image, torch.Tensor): + return (pred_boxes, pred_scores, pred_masks, pred_class_idx) + + # Create color map and convert segmentation mask to RGB image + pred_mask_img = torch.argmax(pred_masks, 1) + + # Overlay the segmentation masks on the image. + color_map = create_color_map(pred_mask_img.max().item() + 1) + out = [] + for i, img_tensor in enumerate(NHWC_int_numpy_frames): + out.append( + Image.blend( + Image.fromarray(img_tensor), + Image.fromarray(color_map[pred_mask_img[i]]), + alpha=0.5, + ) + ) + return out diff --git a/qai_hub_models/models/_shared/yolo/demo.py b/qai_hub_models/models/_shared/yolo/demo.py index 32573160..eb25926d 100644 --- a/qai_hub_models/models/_shared/yolo/demo.py +++ b/qai_hub_models/models/_shared/yolo/demo.py @@ -8,7 +8,10 @@ from PIL import Image -from qai_hub_models.models._shared.yolo.app import YoloObjectDetectionApp +from qai_hub_models.models._shared.yolo.app import ( + YoloObjectDetectionApp, + YoloSegmentationApp, +) from qai_hub_models.utils.args import ( demo_model_from_cli_args, get_model_cli_parser, @@ -16,7 +19,7 @@ validate_on_device_demo_args, ) from qai_hub_models.utils.asset_loaders import CachedWebAsset, load_image -from qai_hub_models.utils.base_model import BaseModel +from qai_hub_models.utils.base_model import BaseModel, TargetRuntime from qai_hub_models.utils.display import display_or_save_image @@ -69,3 +72,53 @@ def yolo_detection_demo( out = Image.fromarray(pred_images[0]) if not is_test: display_or_save_image(out, args.output_dir, "yolo_demo_output.png") + + +def yolo_segmentation_demo( + model_type: type[BaseModel], + model_id: str, + default_image: str | CachedWebAsset, + stride_multiple: int | None = None, + is_test: bool = False, +): + # Demo parameters + parser = get_model_cli_parser(model_type) + parser = get_on_device_demo_parser( + parser, available_target_runtimes=[TargetRuntime.TFLITE], add_output_dir=True + ) + image_help = "image file path or URL." + if stride_multiple: + image_help = f"{image_help} Image spatial dimensions (x and y) must be multiples of {stride_multiple}." + + parser.add_argument( + "--image", + type=str, + default=default_image, + help="Test image file path or URL", + ) + parser.add_argument( + "--score-threshold", + type=float, + default=0.45, + help="Score threshold for NonMaximumSuppression", + ) + parser.add_argument( + "--iou-threshold", + type=float, + default=0.7, + help="Intersection over Union (IoU) threshold for NonMaximumSuppression", + ) + args = parser.parse_args([] if is_test else None) + validate_on_device_demo_args(args, model_id) + + # Load image & model + model = demo_model_from_cli_args(model_type, model_id, args) + app = YoloSegmentationApp(model, args.score_threshold, args.iou_threshold) + + print("Model Loaded") + + image = load_image(args.image) + image_annotated = app.predict_segmentation_from_image(image)[0] + + if not is_test: + display_or_save_image(image_annotated, args.output_dir) diff --git a/qai_hub_models/models/_shared/yolo/model.py b/qai_hub_models/models/_shared/yolo/model.py index a48c01f2..73277ca7 100644 --- a/qai_hub_models/models/_shared/yolo/model.py +++ b/qai_hub_models/models/_shared/yolo/model.py @@ -6,6 +6,7 @@ from qai_hub_models.models._shared.yolo.utils import ( box_transform_xywh2xyxy_split_input, + get_most_likely_score, transform_box_layout_xywh2xyxy, ) @@ -58,3 +59,44 @@ class scores multiplied by confidence: Shape is [batch, num_preds] # should be float32 for the unquantized model. class_dtype = torch.uint8 if use_quantized_postprocessing else torch.float32 return boxes, scores, class_idx.to(class_dtype) + + +def yolo_segment_postprocess(detector_output: torch.Tensor, num_classes: int): + """ + Post processing to break Yolo Segmentation output into multiple, consumable tensors (eg. for NMS). + such as bounding boxes, scores, masks and classes. + + Parameters: + detector_output: torch.Tensor + The output of Yolo Detection model + Shape is [batch, k, num_preds] + where, k = # of classes + 4 + k is structured as follows [boxes (4) : # of classes] + and boxes are co-ordinates [x_center, y_center, w, h] + num_classes: int + number of classes + + Returns: + boxes: torch.Tensor + Bounding box locations. Shape is [batch, num preds, 4] where 4 == (x1, y1, x2, y2) + scores: torch.Tensor + Class scores multiplied by confidence: Shape is [batch, num_preds] + masks: torch.Tensor + Predicted masks: Shape is [batch, num_preds, 32] + class_idx: torch.Tensor + Shape is [batch, num_preds] where the last dim is the index of the most probable class of the prediction. + """ + # Break output into parts + detector_output = torch.permute(detector_output, [0, 2, 1]) + masks_dim = 4 + num_classes + boxes = detector_output[:, :, :4] + scores = detector_output[:, :, 4:masks_dim] + masks = detector_output[:, :, masks_dim:] + + # Convert boxes to (x1, y1, x2, y2) + boxes = transform_box_layout_xywh2xyxy(boxes) + + # Get class ID of most likely score. + scores, class_idx = get_most_likely_score(scores) + + return boxes, scores, masks, class_idx diff --git a/qai_hub_models/models/aotgan/export.py b/qai_hub_models/models/aotgan/export.py index 2af00864..b61cdb26 100644 --- a/qai_hub_models/models/aotgan/export.py +++ b/qai_hub_models/models/aotgan/export.py @@ -206,7 +206,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_onnx=False) + parser = export_parser(model_cls=Model) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/aotgan/perf.yaml b/qai_hub_models/models/aotgan/perf.yaml index 9e2c0b55..c9384c0f 100644 --- a/qai_hub_models/models/aotgan/perf.yaml +++ b/qai_hub_models/models/aotgan/perf.yaml @@ -44,14 +44,15 @@ aggregated: models: - name: AOT-GAN universal_assets: - torchscript_onnx_tflite: mqexp8xkn + torchscript_onnx_tflite: mmd3ykwon + torchscript_onnx: mn1wz28pm performance_metrics: - torchscript_onnx_tflite: - inference_time: 153257.0 - throughput: 6.52498743939918 + inference_time: 150432.0 + throughput: 6.647521803871516 estimated_peak_memory_range: - min: 3190784 - max: 37447656 + min: 3174400 + max: 31806960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -59,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jpy1q437p + job_id: j5q6l8z4p job_status: Passed torchscript_onnx_qnn: - inference_time: 153759.0 - throughput: 6.5036843371770106 + inference_time: 154539.0 + throughput: 6.470858488795709 estimated_peak_memory_range: - min: 94208 - max: 37425800 + min: 3604480 + max: 29180040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -74,7 +75,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 274 - job_id: jped8rm15 + job_id: jg9lzvxlg + job_status: Passed + torchscript_onnx: + inference_time: 103121.0 + throughput: 9.697345836444565 + estimated_peak_memory_range: + min: 172032 + max: 40223544 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 276 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 276 + job_id: jp8qeo4xp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -83,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:52:13Z' + timestamp: '2024-12-12T00:22:49Z' - torchscript_onnx_tflite: - inference_time: 112139.0 - throughput: 8.917504168933199 + inference_time: 112182.0 + throughput: 8.914086038758446 estimated_peak_memory_range: - min: 1363968 - max: 64412768 + min: 1945600 + max: 64089440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -97,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jp0zd1065 + job_id: jglvyno85 job_status: Passed torchscript_onnx_qnn: - inference_time: 111920.0 - throughput: 8.9349535382416 + inference_time: 112516.0 + throughput: 8.88762487112944 estimated_peak_memory_range: - min: 3354624 - max: 64106192 + min: 4272128 + max: 66412688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -112,7 +128,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 274 - job_id: jgz38xdk5 + job_id: jp14n0v2p + job_status: Passed + torchscript_onnx: + inference_time: 77594.0 + throughput: 12.887594401628991 + estimated_peak_memory_range: + min: 13021184 + max: 326167024 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 276 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 276 + job_id: jgke2z92g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -121,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:52:14Z' + timestamp: '2024-12-12T00:22:50Z' - torchscript_onnx_tflite: - inference_time: 118380.0 - throughput: 8.447372867038352 + inference_time: 98596.0 + throughput: 10.14239928597509 estimated_peak_memory_range: - min: 3076096 - max: 68239632 + min: 2748416 + max: 68728384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -135,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jp8q63yxp + job_id: j56y86r0p job_status: Passed torchscript_onnx_qnn: - inference_time: 118426.0 - throughput: 8.444091669059159 + inference_time: 98906.0 + throughput: 10.110610074211879 estimated_peak_memory_range: - min: 4288512 - max: 69219008 + min: 4251648 + max: 69912832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -150,7 +181,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 274 - job_id: j5we8de65 + job_id: jgdxdwzep + job_status: Passed + torchscript_onnx: + inference_time: 73935.0 + throughput: 13.525393927098127 + estimated_peak_memory_range: + min: 614400 + max: 150602128 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 276 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 276 + job_id: j5q6l8m4p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -159,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:52:16Z' + timestamp: '2024-12-12T00:22:51Z' - torchscript_onnx_tflite: - inference_time: 153157.0 - throughput: 6.5292477653649525 + inference_time: 153476.0 + throughput: 6.5156767181839506 estimated_peak_memory_range: - min: 3289088 - max: 31812040 + min: 3198976 + max: 39120408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -173,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jgkeolx2g + job_id: jp3jzkxlg job_status: Passed torchscript_onnx_qnn: - inference_time: 102102.0 - throughput: 9.794127441186264 + inference_time: 102312.0 + throughput: 9.774024552349676 estimated_peak_memory_range: - min: 4382720 - max: 5604232 + min: 4235264 + max: 5856992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -188,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 274 - job_id: jg9lk3llg + job_id: j57yez7l5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -197,28 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:52:17Z' - - torchscript_onnx_tflite: - inference_time: 3625285.0 - throughput: 0.2758403822044336 - estimated_peak_memory_range: - min: 3551232 - max: 67625424 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 235 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 235 - job_id: j5q6z7q4p - job_status: Passed - torchscript_onnx_qnn: - inference_time: 3580513.0 - throughput: 0.27928958783280494 + timestamp: '2024-12-12T00:22:39Z' + - torchscript_onnx_qnn: + inference_time: 3580504.0 + throughput: 0.2792902898586344 estimated_peak_memory_range: - min: 3604480 - max: 10782464 + min: 3117056 + max: 13586416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -226,7 +257,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 274 - job_id: jgdx8rxep + job_id: jpxklvd15 job_status: Passed reference_device_info: name: SA7255P ADP @@ -235,13 +266,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:52:19Z' + timestamp: '2024-12-12T00:22:42Z' - torchscript_onnx_tflite: - inference_time: 153372.0 - throughput: 6.520094932582219 + inference_time: 153294.0 + throughput: 6.523412527561418 estimated_peak_memory_range: - min: 3260416 - max: 32308536 + min: 3268608 + max: 36061256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -249,14 +280,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jglvo0m85 + job_id: jpv6l3ej5 job_status: Passed torchscript_onnx_qnn: - inference_time: 102120.0 - throughput: 9.792401096748923 + inference_time: 102133.0 + throughput: 9.791154670870336 estimated_peak_memory_range: - min: 4468736 - max: 5742480 + min: 4444160 + max: 5771184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -264,7 +295,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 274 - job_id: j57ykjyl5 + job_id: j5mn0rdwp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -273,13 +304,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:52:20Z' + timestamp: '2024-12-12T00:22:43Z' - torchscript_onnx_tflite: - inference_time: 219091.0 - throughput: 4.564313458791096 + inference_time: 219056.0 + throughput: 4.565042728799941 estimated_peak_memory_range: - min: 3256320 - max: 53684256 + min: 3276800 + max: 53895696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -287,14 +318,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: j56yr340p + job_id: jgjvrxoxg job_status: Passed torchscript_onnx_qnn: - inference_time: 164549.0 - throughput: 6.077217120736073 + inference_time: 164502.0 + throughput: 6.0789534473745 estimated_peak_memory_range: - min: 1261568 - max: 7152464 + min: 4612096 + max: 10798608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -302,7 +333,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 274 - job_id: jp4lmxlv5 + job_id: jgn6z27r5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -311,13 +342,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:52:22Z' + timestamp: '2024-12-12T00:22:44Z' - torchscript_onnx_tflite: - inference_time: 153380.0 - throughput: 6.519754857217369 + inference_time: 153164.0 + throughput: 6.528949361468753 estimated_peak_memory_range: - min: 3194880 - max: 36136136 + min: 3272704 + max: 34875472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -325,14 +356,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jp3jx40lg + job_id: jped79815 job_status: Passed torchscript_onnx_qnn: - inference_time: 101825.0 - throughput: 9.820770930518046 + inference_time: 101840.0 + throughput: 9.819324430479183 estimated_peak_memory_range: - min: 3395584 - max: 6651512 + min: 5238784 + max: 6686792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -340,7 +371,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 274 - job_id: jpxk37k15 + job_id: jprvlkn9g job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -349,13 +380,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:52:23Z' + timestamp: '2024-12-12T00:22:45Z' - torchscript_onnx_tflite: - inference_time: 241890.0 - throughput: 4.134110546116003 + inference_time: 241936.0 + throughput: 4.133324515574367 estimated_peak_memory_range: - min: 3256320 - max: 67631312 + min: 3153920 + max: 67479200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -363,14 +394,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jgo2o16xp + job_id: jgz3le8k5 job_status: Passed torchscript_onnx_qnn: - inference_time: 181587.0 - throughput: 5.507002153237842 + inference_time: 181637.0 + throughput: 5.505486217015256 estimated_peak_memory_range: - min: 3084288 - max: 8954064 + min: 4837376 + max: 10821040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -378,7 +409,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 274 - job_id: j5mnownwp + job_id: jp2kr8v4p job_status: Passed reference_device_info: name: SA8775P ADP @@ -387,13 +418,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:52:24Z' + timestamp: '2024-12-12T00:22:46Z' - torchscript_onnx_tflite: - inference_time: 196585.0 - throughput: 5.086858102093242 + inference_time: 197453.0 + throughput: 5.064496361159365 estimated_peak_memory_range: - min: 3203072 - max: 55386944 + min: 3280896 + max: 55463296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -401,14 +432,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jgjvo0nxg + job_id: j5welo165 job_status: Passed torchscript_onnx_qnn: - inference_time: 196144.0 - throughput: 5.098295130108491 + inference_time: 196526.0 + throughput: 5.088385251824186 estimated_peak_memory_range: - min: 3903488 - max: 55304496 + min: 3162112 + max: 53842560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -416,7 +447,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 274 - job_id: jgn6o96r5 + job_id: jpy1oe77p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -425,10 +456,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:52:25Z' + timestamp: '2024-12-12T00:22:48Z' - torchscript_onnx_qnn: - inference_time: 110930.0 - throughput: 9.014693951140359 + inference_time: 105198.0 + throughput: 9.505884142284074 estimated_peak_memory_range: min: 4202496 max: 4202496 @@ -439,7 +470,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 274 - job_id: jp147d42p + job_id: jp4lyq9v5 + job_status: Passed + torchscript_onnx: + inference_time: 102432.0 + throughput: 9.762574195563886 + estimated_peak_memory_range: + min: 31252480 + max: 31252480 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 276 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 276 + job_id: jglvyn185 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -448,4 +494,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:52:18Z' + timestamp: '2024-12-12T00:22:52Z' diff --git a/qai_hub_models/models/beit/perf.yaml b/qai_hub_models/models/beit/perf.yaml index 0806f14b..b4b3ba8f 100644 --- a/qai_hub_models/models/beit/perf.yaml +++ b/qai_hub_models/models/beit/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: Beit universal_assets: - torchscript_onnx_tflite: mmdy89y0m - torchscript_onnx: mmdy89d0m + torchscript_onnx_tflite: mq36e9llq + torchscript_onnx: mnl6v8kkn performance_metrics: - torchscript_onnx_tflite: - inference_time: 16571.0 - throughput: 60.34638826866212 + inference_time: 16988.0 + throughput: 58.865081233812106 estimated_peak_memory_range: - min: 40960 - max: 50653936 + min: 45056 + max: 47037976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: jp4lmw0v5 + job_id: jgn6z2xr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 25159.0 - throughput: 39.74720775865495 + inference_time: 25138.0 + throughput: 39.78041212506962 estimated_peak_memory_range: - min: 40960 - max: 49579840 + min: 12288 + max: 49586312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 420 - job_id: j5q6z2o4p + job_id: jp3jzk8lg job_status: Passed torchscript_onnx: - inference_time: 18196.0 - throughput: 54.957133435919985 + inference_time: 18162.0 + throughput: 55.06001541680432 estimated_peak_memory_range: - min: 69632 - max: 203332560 + min: 61440 + max: 203180768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 373 - job_id: jp147dz2p + job_id: jpxklv315 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:51:31Z' + timestamp: '2024-12-12T00:21:53Z' - torchscript_onnx_tflite: - inference_time: 11735.0 - throughput: 85.2151682999574 + inference_time: 11722.0 + throughput: 85.30967411704488 estimated_peak_memory_range: - min: 45056 - max: 64800640 + min: 20480 + max: 62354752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: jpxk31215 + job_id: jprvlk99g job_status: Passed torchscript_onnx_qnn: - inference_time: 17586.0 - throughput: 56.86341407938133 + inference_time: 17465.0 + throughput: 57.2573718866304 estimated_peak_memory_range: - min: 622592 - max: 63140976 + min: 221184 + max: 65553136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 420 - job_id: jglvokr85 + job_id: jgo2lymxp job_status: Passed torchscript_onnx: - inference_time: 13816.0 - throughput: 72.37984944991314 + inference_time: 13876.0 + throughput: 72.06687806284232 estimated_peak_memory_range: min: 0 - max: 162712976 + max: 162901136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 373 - job_id: jgdx8r1ep + job_id: j5mn0rowp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:51:32Z' + timestamp: '2024-12-12T00:21:54Z' - torchscript_onnx_tflite: - inference_time: 9505.0 - throughput: 105.20778537611783 + inference_time: 11467.0 + throughput: 87.20676724513822 estimated_peak_memory_range: - min: 16384 - max: 64241872 + min: 12288 + max: 65145552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: j5mnozywp + job_id: jp2kr8j4p job_status: Passed torchscript_onnx_qnn: - inference_time: 14852.0 - throughput: 67.330999192028 + inference_time: 17864.0 + throughput: 55.97850425436632 estimated_peak_memory_range: - min: 614400 - max: 66996256 + min: 638976 + max: 67815904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 420 - job_id: j56yr1l0p + job_id: jpv6l34j5 job_status: Passed torchscript_onnx: - inference_time: 13499.0 - throughput: 74.07956144899622 + inference_time: 13643.0 + throughput: 73.29766180458843 estimated_peak_memory_range: - min: 663552 - max: 110849312 + min: 454656 + max: 110516368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 373 - job_id: j57ykjrl5 + job_id: jgn6z2or5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:51:33Z' + timestamp: '2024-12-12T00:21:55Z' - torchscript_onnx_tflite: - inference_time: 16620.0 - throughput: 60.16847172081829 + inference_time: 16966.0 + throughput: 58.94141223623718 estimated_peak_memory_range: - min: 36864 - max: 45447880 + min: 40960 + max: 46554112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: jgn6oe8r5 + job_id: jpy1oen7p job_status: Passed torchscript_onnx_qnn: - inference_time: 17346.0 - throughput: 57.65017871555402 + inference_time: 17588.0 + throughput: 56.856947919035704 estimated_peak_memory_range: - min: 671744 - max: 2293088 + min: 712704 + max: 1889656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 420 - job_id: jp3jxm2lg + job_id: jgjvrx1xg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:51:21Z' + timestamp: '2024-12-12T00:21:44Z' - torchscript_onnx_tflite: - inference_time: 260883.0 - throughput: 3.8331359268331013 + inference_time: 260912.0 + throughput: 3.8327098791929846 estimated_peak_memory_range: - min: 102400 - max: 64153152 + min: 114688 + max: 63671024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: jprvoyj9g + job_id: jp0zmyk65 job_status: Passed torchscript_onnx_qnn: - inference_time: 268695.0 - throughput: 3.7216918811291615 + inference_time: 268783.0 + throughput: 3.7204733930345295 estimated_peak_memory_range: - min: 757760 - max: 7940608 + min: 647168 + max: 11776192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 420 - job_id: jpv6ewxj5 + job_id: jgz3lewk5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:51:24Z' + timestamp: '2024-12-12T00:21:46Z' - torchscript_onnx_tflite: - inference_time: 16871.0 - throughput: 59.273309228854245 + inference_time: 16999.0 + throughput: 58.82698982293076 estimated_peak_memory_range: - min: 49152 - max: 47687816 + min: 40960 + max: 50505496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: jp2k4mn4p + job_id: jp8qeo8xp job_status: Passed torchscript_onnx_qnn: - inference_time: 17457.0 - throughput: 57.28361115884746 + inference_time: 17516.0 + throughput: 57.09065996802923 estimated_peak_memory_range: - min: 663552 - max: 2349960 + min: 696320 + max: 2059864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 420 - job_id: jgjvol4xg + job_id: j5welo865 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:51:25Z' + timestamp: '2024-12-12T00:21:47Z' - torchscript_onnx_tflite: - inference_time: 24531.0 - throughput: 40.764746647099585 + inference_time: 24511.0 + throughput: 40.79800905715801 estimated_peak_memory_range: - min: 40960 - max: 57967920 + min: 65536 + max: 58546560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: jpy1qd07p + job_id: jgke2zd2g job_status: Passed torchscript_onnx_qnn: - inference_time: 30244.0 - throughput: 33.064409469646876 + inference_time: 30220.0 + throughput: 33.090668431502316 estimated_peak_memory_range: - min: 712704 - max: 6592544 + min: 688128 + max: 6830320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 420 - job_id: jped8v315 + job_id: jg9lzvklg job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:51:26Z' + timestamp: '2024-12-12T00:21:49Z' - torchscript_onnx_tflite: - inference_time: 16994.0 - throughput: 58.84429798752501 + inference_time: 16938.0 + throughput: 59.0388475616956 estimated_peak_memory_range: - min: 40960 - max: 42790400 + min: 20480 + max: 44679104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: jp0zdr765 + job_id: j5q6l8w4p job_status: Passed torchscript_onnx_qnn: - inference_time: 17383.0 - throughput: 57.52746936662256 + inference_time: 17502.0 + throughput: 57.13632727688264 estimated_peak_memory_range: - min: 643072 - max: 1960256 + min: 659456 + max: 1962968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 420 - job_id: jgz387kk5 + job_id: jp14n072p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:51:27Z' + timestamp: '2024-12-12T00:21:50Z' - torchscript_onnx_tflite: - inference_time: 24504.0 - throughput: 40.80966372837088 + inference_time: 24485.0 + throughput: 40.84133142740453 estimated_peak_memory_range: - min: 258048 - max: 63829600 + min: 40960 + max: 63091888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: jp8q67vxp + job_id: jglvyn785 job_status: Passed torchscript_onnx_qnn: - inference_time: 25410.0 - throughput: 39.354584809130266 + inference_time: 25383.0 + throughput: 39.396446440531065 estimated_peak_memory_range: - min: 0 - max: 5649824 + min: 684032 + max: 11623744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 420 - job_id: j5we8d665 + job_id: jgdxdw8ep job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:51:29Z' + timestamp: '2024-12-12T00:21:51Z' - torchscript_onnx_tflite: - inference_time: 21925.0 - throughput: 45.61003420752566 + inference_time: 21866.0 + throughput: 45.7331016189518 estimated_peak_memory_range: - min: 36864 - max: 59127904 + min: 0 + max: 60725936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: jgkeoym2g + job_id: j56y86v0p job_status: Passed torchscript_onnx_qnn: - inference_time: 31480.0 - throughput: 31.76620076238882 + inference_time: 31491.0 + throughput: 31.755104633069767 estimated_peak_memory_range: min: 647168 - max: 59758864 + max: 59550144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 420 - job_id: jg9lk3nlg + job_id: j57yezkl5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:51:30Z' + timestamp: '2024-12-12T00:21:52Z' - torchscript_onnx_qnn: - inference_time: 18476.0 - throughput: 54.12426932236415 + inference_time: 18520.0 + throughput: 53.99568034557235 estimated_peak_memory_range: min: 602112 max: 602112 @@ -485,22 +485,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 420 - job_id: jgo2ovqxp + job_id: jped79215 job_status: Passed torchscript_onnx: - inference_time: 21662.0 - throughput: 46.163789123811284 + inference_time: 22888.0 + throughput: 43.69101712687871 estimated_peak_memory_range: - min: 194805760 - max: 194805760 + min: 194719744 + max: 194719744 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 373 + layers_on_npu: 9 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 373 - job_id: jp4lmxrv5 + total_layers: 9 + job_id: jprvlko9g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:51:34Z' + timestamp: '2024-12-12T00:21:56Z' diff --git a/qai_hub_models/models/conditional_detr_resnet50/perf.yaml b/qai_hub_models/models/conditional_detr_resnet50/perf.yaml index e1948167..ef042fa7 100644 --- a/qai_hub_models/models/conditional_detr_resnet50/perf.yaml +++ b/qai_hub_models/models/conditional_detr_resnet50/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: Conditional-DETR-ResNet50 universal_assets: - torchscript_onnx_tflite: mnzv4o3pq - torchscript_onnx: mm5dwev2n + torchscript_onnx_tflite: mmxe7r1rn + torchscript_onnx: mqyv39kvq performance_metrics: - torchscript_onnx_tflite: - inference_time: 22146.0 - throughput: 45.154881242662334 + inference_time: 22106.0 + throughput: 45.236587351850176 estimated_peak_memory_range: - min: 135168 - max: 23102592 + min: 126976 + max: 23810040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 843 - job_id: jgdx8vwlp + job_id: jg9lzv9lg job_status: Passed torchscript_onnx_qnn: - inference_time: 22716.0 - throughput: 44.02183483007572 + inference_time: 22351.0 + throughput: 44.74072748422889 estimated_peak_memory_range: - min: 2818048 - max: 25951448 + min: 2781184 + max: 24618888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 891 - job_id: jprvoyq9g + job_id: jpy1oev7p job_status: Passed torchscript_onnx: - inference_time: 19125.0 - throughput: 52.287581699346404 + inference_time: 19411.0 + throughput: 51.51718097985678 estimated_peak_memory_range: - min: 3063808 - max: 5307848 + min: 3092480 + max: 5691168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 843 - job_id: jpv6ewoj5 + job_id: jgz3levk5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:50:38Z' + timestamp: '2024-12-12T00:21:01Z' - torchscript_onnx_tflite: - inference_time: 16694.0 - throughput: 59.901761111776686 + inference_time: 16353.0 + throughput: 61.15085916957133 estimated_peak_memory_range: - min: 20480 - max: 77854176 + min: 147456 + max: 79206752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 843 - job_id: j5we89v65 + job_id: jp14n0q2p job_status: Passed torchscript_onnx_qnn: - inference_time: 16763.0 - throughput: 59.6551929845493 + inference_time: 16603.0 + throughput: 60.23007890140336 estimated_peak_memory_range: - min: 2801664 - max: 76293264 + min: 208527360 + max: 283336816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 891 - job_id: jp2k4m64p + job_id: jp0zmye65 job_status: Passed torchscript_onnx: - inference_time: 14517.0 - throughput: 68.88475580354067 + inference_time: 14729.0 + throughput: 67.89327177676692 estimated_peak_memory_range: - min: 3039232 - max: 309736464 + min: 183341056 + max: 492286560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 843 - job_id: jgjvolmxg + job_id: j5welox65 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:50:39Z' + timestamp: '2024-12-12T00:21:02Z' - torchscript_onnx_tflite: - inference_time: 13280.0 - throughput: 75.3012048192771 + inference_time: 14970.0 + throughput: 66.8002672010688 estimated_peak_memory_range: min: 122880 - max: 79855808 + max: 80122528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 843 - job_id: jg9lk41lg + job_id: jgdxdw7ep job_status: Passed torchscript_onnx_qnn: - inference_time: 13429.0 - throughput: 74.46570854121677 + inference_time: 13452.0 + throughput: 74.3383883437407 estimated_peak_memory_range: - min: 2781184 - max: 80942016 + min: 2777088 + max: 82193216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 891 - job_id: jpy1qdw7p + job_id: jp8qeowxp job_status: Passed torchscript_onnx: - inference_time: 13245.0 - throughput: 75.50018875047188 + inference_time: 13266.0 + throughput: 75.38067239559777 estimated_peak_memory_range: min: 0 - max: 147948784 + max: 149773920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 843 - job_id: jped8v115 + job_id: jg9lzv8lg job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:50:40Z' + timestamp: '2024-12-12T00:21:03Z' - torchscript_onnx_tflite: - inference_time: 22072.0 - throughput: 45.30627038782168 + inference_time: 22186.0 + throughput: 45.073469755701794 estimated_peak_memory_range: - min: 32768 - max: 24683880 + min: 155648 + max: 24160960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 843 - job_id: jp1478l2p + job_id: j57yezvl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 17370.0 - throughput: 57.570523891767415 + inference_time: 17751.0 + throughput: 56.334854374401445 estimated_peak_memory_range: - min: 2850816 - max: 4084088 + min: 2830336 + max: 4045120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 891 - job_id: jp0zdrq65 + job_id: jgke2zr2g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:50:28Z' + timestamp: '2024-12-12T00:20:51Z' - torchscript_onnx_tflite: - inference_time: 350022.0 - throughput: 2.8569632765940427 + inference_time: 349877.0 + throughput: 2.858147291762534 estimated_peak_memory_range: - min: 155648 - max: 78607008 + min: 16384 + max: 78903712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 843 - job_id: jgdx8v9ep + job_id: jp4lyqjv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 346775.0 - throughput: 2.8837142239204097 + inference_time: 346721.0 + throughput: 2.8841633474753476 estimated_peak_memory_range: - min: 1949696 - max: 7638896 + min: 2625536 + max: 12542912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 891 - job_id: jgkeoyn2g + job_id: jglvyne85 job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:50:31Z' + timestamp: '2024-12-12T00:20:53Z' - torchscript_onnx_tflite: - inference_time: 22070.0 - throughput: 45.31037607612143 + inference_time: 21977.0 + throughput: 45.50211584838695 estimated_peak_memory_range: - min: 16384 - max: 23150848 + min: 135168 + max: 24035080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 843 - job_id: j57ykdwl5 + job_id: jpxklve15 job_status: Passed torchscript_onnx_qnn: - inference_time: 17488.0 - throughput: 57.18206770356816 + inference_time: 17439.0 + throughput: 57.342737542290266 estimated_peak_memory_range: - min: 2859008 - max: 4605904 + min: 2846720 + max: 4436816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 891 - job_id: j5q6z2k4p + job_id: j56y86q0p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:50:32Z' + timestamp: '2024-12-12T00:20:54Z' - torchscript_onnx_tflite: - inference_time: 29662.0 - throughput: 33.71316836356281 + inference_time: 29740.0 + throughput: 33.62474781439139 estimated_peak_memory_range: min: 126976 - max: 63323552 + max: 64616608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 843 - job_id: jp4lmwov5 + job_id: j5mn0rvwp job_status: Passed torchscript_onnx_qnn: - inference_time: 24063.0 - throughput: 41.55757802435274 + inference_time: 24038.0 + throughput: 41.600798735335715 estimated_peak_memory_range: - min: 126976 - max: 5959232 + min: 2801664 + max: 9189904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 891 - job_id: jglvokz85 + job_id: jp3jzkqlg job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:50:33Z' + timestamp: '2024-12-12T00:20:56Z' - torchscript_onnx_tflite: - inference_time: 22203.0 - throughput: 45.03895869927487 + inference_time: 22406.0 + throughput: 44.63090243684727 estimated_peak_memory_range: - min: 167936 - max: 23528632 + min: 135168 + max: 22936976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 843 - job_id: jpxk31j15 + job_id: jgn6z2rr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 17645.0 - throughput: 56.67327854916407 + inference_time: 17510.0 + throughput: 57.110222729868646 estimated_peak_memory_range: - min: 2842624 - max: 4142152 + min: 2813952 + max: 4133792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 891 - job_id: j56yr1j0p + job_id: jgo2lyexp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:50:34Z' + timestamp: '2024-12-12T00:20:57Z' - torchscript_onnx_tflite: - inference_time: 31379.0 - throughput: 31.868447050575224 + inference_time: 31532.0 + throughput: 31.713814537612585 estimated_peak_memory_range: - min: 131072 - max: 78622992 + min: 233472 + max: 79445504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 843 - job_id: j5mnoz2wp + job_id: jprvlk19g job_status: Passed torchscript_onnx_qnn: - inference_time: 25504.0 - throughput: 39.20953575909661 + inference_time: 25617.0 + throughput: 39.036577272904715 estimated_peak_memory_range: - min: 110592 - max: 5918672 + min: 2412544 + max: 8270128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 891 - job_id: jp3jxm3lg + job_id: jpv6l3zj5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:50:36Z' + timestamp: '2024-12-12T00:20:58Z' - torchscript_onnx_tflite: - inference_time: 26539.0 - throughput: 37.68039489053845 + inference_time: 25790.0 + throughput: 38.774718883288095 estimated_peak_memory_range: - min: 16384 - max: 63153760 + min: 155648 + max: 61402448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 843 - job_id: jgn6oeyr5 + job_id: jp2kr834p job_status: Passed torchscript_onnx_qnn: - inference_time: 27837.0 - throughput: 35.92341128713583 + inference_time: 27662.0 + throughput: 36.15067601764153 estimated_peak_memory_range: - min: 2535424 - max: 62521408 + min: 2781184 + max: 63583472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 891 - job_id: jgo2ov0xp + job_id: jgjvrxkxg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:50:37Z' + timestamp: '2024-12-12T00:20:59Z' - torchscript_onnx_qnn: - inference_time: 17289.0 - throughput: 57.84024524263983 + inference_time: 17298.0 + throughput: 57.810151462596835 estimated_peak_memory_range: min: 2768896 max: 2768896 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 891 - job_id: jp8q679xp + job_id: j5q6l894p job_status: Passed torchscript_onnx: - inference_time: 20551.0 - throughput: 48.65943263101552 + inference_time: 20510.0 + throughput: 48.75670404680643 estimated_peak_memory_range: - min: 87568384 - max: 87568384 + min: 87457792 + max: 87457792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 843 - job_id: jgz3879k5 + job_id: jp14n032p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:50:41Z' + timestamp: '2024-12-12T00:21:04Z' diff --git a/qai_hub_models/models/controlnet_quantized/export.py b/qai_hub_models/models/controlnet_quantized/export.py index 222fb205..27425f10 100644 --- a/qai_hub_models/models/controlnet_quantized/export.py +++ b/qai_hub_models/models/controlnet_quantized/export.py @@ -25,10 +25,10 @@ ) ALL_COMPONENTS = [ + "ControlNet_Quantized", "TextEncoder_Quantized", "UNet_Quantized", "VAEDecoder_Quantized", - "ControlNet_Quantized", ] DEFAULT_COMPONENTS = [ "TextEncoder_Quantized", @@ -116,14 +116,14 @@ def export_model( print("Initializing model class") model = Model.from_precompiled() components_dict: dict[str, BasePrecompiledModel] = {} + if "ControlNet_Quantized" in components: + components_dict["ControlNet_Quantized"] = model.controlnet # type: ignore if "TextEncoder_Quantized" in components: components_dict["TextEncoder_Quantized"] = model.text_encoder # type: ignore if "UNet_Quantized" in components: components_dict["UNet_Quantized"] = model.unet # type: ignore if "VAEDecoder_Quantized" in components: components_dict["VAEDecoder_Quantized"] = model.vae_decoder # type: ignore - if "ControlNet_Quantized" in components: - components_dict["ControlNet_Quantized"] = model.controlnet # type: ignore # 2. Upload model assets to hub print("Uploading model assets on hub") diff --git a/qai_hub_models/models/convnext_base/perf.yaml b/qai_hub_models/models/convnext_base/perf.yaml index d0f5292a..059a9298 100644 --- a/qai_hub_models/models/convnext_base/perf.yaml +++ b/qai_hub_models/models/convnext_base/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: ConvNext-Base universal_assets: - torchscript_onnx_tflite: mn1z8wl8m - torchscript_onnx: mn1z8w48m + torchscript_onnx_tflite: mq36e9olq + torchscript_onnx: mq8dkwwzm performance_metrics: - torchscript_onnx_tflite: - inference_time: 7715.0 - throughput: 129.61762799740765 + inference_time: 7766.0 + throughput: 128.76641771825908 estimated_peak_memory_range: - min: 16384 - max: 29547992 + min: 20480 + max: 22678992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 598 - job_id: jpv6ewjm5 + job_id: jgo2lyvdp job_status: Passed torchscript_onnx_qnn: - inference_time: 8470.0 - throughput: 118.06375442739079 + inference_time: 8507.0 + throughput: 117.55025273304338 estimated_peak_memory_range: - min: 0 - max: 25258136 + min: 24576 + max: 24724664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 430 - job_id: j5mnozrqp + job_id: jp4lyqxl5 job_status: Passed torchscript_onnx: - inference_time: 37197.0 - throughput: 26.88388848563056 + inference_time: 37994.0 + throughput: 26.319945254513872 estimated_peak_memory_range: - min: 90112 - max: 202770968 + min: 61440 + max: 203417344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 360 - job_id: jp3jxmkzg + job_id: jgo2ly1dp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:49:36Z' + timestamp: '2024-12-12T00:19:59Z' - torchscript_onnx_tflite: - inference_time: 5793.0 - throughput: 172.62213015708613 + inference_time: 5756.0 + throughput: 173.73175816539262 estimated_peak_memory_range: min: 20480 - max: 64365312 + max: 64814592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 598 - job_id: jgjvolj8g + job_id: jpv6l3wm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 6173.0 - throughput: 161.99578810950916 + inference_time: 6100.0 + throughput: 163.9344262295082 estimated_peak_memory_range: min: 0 - max: 60478976 + max: 61268272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 430 - job_id: jgn6oe2m5 + job_id: jpxklv795 job_status: Passed torchscript_onnx: - inference_time: 26460.0 - throughput: 37.79289493575208 + inference_time: 26476.0 + throughput: 37.77005589968273 estimated_peak_memory_range: min: 0 - max: 772648736 + max: 775420688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 360 - job_id: jgo2ovydp + job_id: jgjvrx08g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:49:37Z' + timestamp: '2024-12-12T00:20:01Z' - torchscript_onnx_tflite: - inference_time: 4982.0 - throughput: 200.7226013649137 + inference_time: 4357.0 + throughput: 229.51572182694514 estimated_peak_memory_range: min: 12288 - max: 73715136 + max: 74801728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 598 - job_id: jped8vj05 + job_id: jgjvrxl8g job_status: Passed torchscript_onnx_qnn: - inference_time: 5558.0 - throughput: 179.92083483267362 + inference_time: 5549.0 + throughput: 180.21265092809514 estimated_peak_memory_range: min: 0 - max: 74405184 + max: 76400496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 430 - job_id: jprvoykeg + job_id: j5mn0rwqp job_status: Passed torchscript_onnx: - inference_time: 23367.0 - throughput: 42.79539521547481 + inference_time: 23401.0 + throughput: 42.73321652920816 estimated_peak_memory_range: min: 0 - max: 174746816 + max: 176885328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 360 - job_id: jpv6ew3m5 + job_id: jped79r05 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:49:38Z' + timestamp: '2024-12-12T00:20:02Z' - torchscript_onnx_tflite: - inference_time: 7749.0 - throughput: 129.0489095367144 + inference_time: 7775.0 + throughput: 128.61736334405145 estimated_peak_memory_range: - min: 16384 - max: 26174296 + min: 36864 + max: 23699312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 598 - job_id: jgz387165 + job_id: jped79v05 job_status: Passed torchscript_onnx_qnn: - inference_time: 8103.0 - throughput: 123.4110823151919 + inference_time: 8178.0 + throughput: 122.2792858889704 estimated_peak_memory_range: - min: 647168 - max: 1984848 + min: 643072 + max: 2203544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 430 - job_id: jp2k4m8mp + job_id: jgn6z29m5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:49:26Z' + timestamp: '2024-12-12T00:19:50Z' - torchscript_onnx_tflite: - inference_time: 20154.0 - throughput: 49.61794184777215 + inference_time: 20221.0 + throughput: 49.45353840067257 estimated_peak_memory_range: - min: 49152 - max: 57310176 + min: 40960 + max: 52950608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 598 - job_id: jp4lmwql5 + job_id: j57yezjr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 21037.0 - throughput: 47.535294956505204 + inference_time: 20997.0 + throughput: 47.6258513120922 estimated_peak_memory_range: min: 0 - max: 57343344 + max: 55884416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 430 - job_id: j56yr167p + job_id: j56y8637p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -281,10 +281,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:49:35Z' + timestamp: '2024-12-12T00:19:58Z' - torchscript_onnx_qnn: - inference_time: 8581.0 - throughput: 116.53653420347278 + inference_time: 8964.0 + throughput: 111.55734047300312 estimated_peak_memory_range: min: 602112 max: 602112 @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 430 - job_id: jpy1qde4p + job_id: jprvlk4eg job_status: Passed torchscript_onnx: - inference_time: 41106.0 - throughput: 24.327348805527173 + inference_time: 41036.0 + throughput: 24.368846866166294 estimated_peak_memory_range: - min: 183599104 - max: 183599104 + min: 184664064 + max: 184664064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 360 - job_id: jgjvolx8g + job_id: jgz3lex65 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -319,4 +319,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:49:39Z' + timestamp: '2024-12-12T00:20:03Z' diff --git a/qai_hub_models/models/convnext_tiny/perf.yaml b/qai_hub_models/models/convnext_tiny/perf.yaml index aa433372..07e71783 100644 --- a/qai_hub_models/models/convnext_tiny/perf.yaml +++ b/qai_hub_models/models/convnext_tiny/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: ConvNext-Tiny universal_assets: - torchscript_onnx_tflite: mqpzy3j0n - torchscript_onnx: mmx7zegjq + torchscript_onnx_tflite: mngg1y85n + torchscript_onnx: mnj4xz7kn performance_metrics: - torchscript_onnx_tflite: - inference_time: 3346.0 - throughput: 298.86431560071725 + inference_time: 3367.0 + throughput: 297.000297000297 estimated_peak_memory_range: - min: 36864 - max: 271223880 + min: 20480 + max: 292326848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 328 - job_id: jp8q67n8p + job_id: jgn6z23m5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3949.0 - throughput: 253.2286654849329 + inference_time: 3968.0 + throughput: 252.01612903225808 estimated_peak_memory_range: - min: 643072 - max: 292175192 + min: 618496 + max: 271594480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 232 - job_id: jgz387665 + job_id: jp3jzkezg job_status: Passed torchscript_onnx: - inference_time: 13377.0 - throughput: 74.75517679599312 + inference_time: 13448.0 + throughput: 74.360499702558 estimated_peak_memory_range: - min: 16384 - max: 68349440 + min: 0 + max: 69189752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jp2k4m2mp + job_id: jp4lyqwl5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:48:42Z' + timestamp: '2024-12-12T00:19:06Z' - torchscript_onnx_tflite: - inference_time: 2447.0 - throughput: 408.6636697997548 + inference_time: 2473.0 + throughput: 404.36716538617065 estimated_peak_memory_range: min: 16384 - max: 36823008 + max: 36814864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 328 - job_id: jgkeoy1og + job_id: jprvlkeeg job_status: Passed torchscript_onnx_qnn: - inference_time: 2820.0 - throughput: 354.6099290780142 + inference_time: 2799.0 + throughput: 357.27045373347624 estimated_peak_memory_range: min: 618496 - max: 37241152 + max: 36862512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 232 - job_id: j5we89kj5 + job_id: jgo2ly3dp job_status: Passed torchscript_onnx: - inference_time: 9597.0 - throughput: 104.19922892570595 + inference_time: 9555.0 + throughput: 104.65724751439038 estimated_peak_memory_range: - min: 335872 - max: 387590384 + min: 0 + max: 388727824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jpy1qd94p + job_id: j5mn0rzqp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:48:43Z' + timestamp: '2024-12-12T00:19:08Z' - torchscript_onnx_tflite: - inference_time: 1846.0 - throughput: 541.7118093174431 + inference_time: 1819.0 + throughput: 549.7526113249038 estimated_peak_memory_range: min: 12288 - max: 38795760 + max: 40367872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 328 - job_id: j5q6z2nmp + job_id: jp2kr8lmp job_status: Passed torchscript_onnx_qnn: - inference_time: 2445.0 - throughput: 408.9979550102249 + inference_time: 2523.0 + throughput: 396.3535473642489 estimated_peak_memory_range: min: 0 - max: 38475104 + max: 39754304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 232 - job_id: jg9lk4rvg + job_id: jpv6l3vm5 job_status: Passed torchscript_onnx: - inference_time: 7291.0 - throughput: 137.1553970648745 + inference_time: 8445.0 + throughput: 118.41326228537596 estimated_peak_memory_range: - min: 647168 - max: 132704816 + min: 626688 + max: 134244240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jp0zdrne5 + job_id: jprvlkyeg job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:48:45Z' + timestamp: '2024-12-12T00:19:09Z' - torchscript_onnx_tflite: - inference_time: 3342.0 - throughput: 299.22202274087374 + inference_time: 3337.0 + throughput: 299.6703626011387 estimated_peak_memory_range: min: 20480 - max: 334076168 + max: 344801688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 328 - job_id: jglvokdl5 + job_id: jpy1oe64p job_status: Passed torchscript_onnx_qnn: - inference_time: 3641.0 - throughput: 274.64982147761606 + inference_time: 3639.0 + throughput: 274.80076944215443 estimated_peak_memory_range: - min: 626688 - max: 1848136 + min: 638976 + max: 2175768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 232 - job_id: jp14789lp + job_id: jgjvrxe8g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:48:33Z' + timestamp: '2024-12-12T00:18:57Z' - torchscript_onnx_tflite: - inference_time: 96726.0 - throughput: 10.338481897318198 + inference_time: 96765.0 + throughput: 10.334315093267193 estimated_peak_memory_range: - min: 49152 - max: 36969056 + min: 73728 + max: 38025392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 328 - job_id: j56yr1x7p + job_id: jp0zmyle5 job_status: Passed torchscript_onnx_qnn: - inference_time: 97255.0 - throughput: 10.282247699347078 + inference_time: 97253.0 + throughput: 10.282459152931015 estimated_peak_memory_range: - min: 1089536 - max: 8159904 + min: 765952 + max: 11383408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 232 - job_id: j57ykdqr5 + job_id: jgz3ler65 job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:48:35Z' + timestamp: '2024-12-12T00:18:59Z' - torchscript_onnx_tflite: - inference_time: 3344.0 - throughput: 299.0430622009569 + inference_time: 3378.0 + throughput: 296.0331557134399 estimated_peak_memory_range: min: 16384 - max: 281702112 + max: 333843536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 328 - job_id: jp3jxmdzg + job_id: jp8qeoz8p job_status: Passed torchscript_onnx_qnn: - inference_time: 3716.0 - throughput: 269.1065662002153 + inference_time: 3696.0 + throughput: 270.56277056277054 estimated_peak_memory_range: - min: 274432 - max: 1532936 + min: 634880 + max: 1754680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 232 - job_id: jp4lmwzl5 + job_id: j5weloqj5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:48:37Z' + timestamp: '2024-12-12T00:19:00Z' - torchscript_onnx_tflite: - inference_time: 11150.0 - throughput: 89.68609865470852 + inference_time: 11123.0 + throughput: 89.90380293086398 estimated_peak_memory_range: - min: 49152 - max: 33832784 + min: 36864 + max: 35440432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 328 - job_id: jgo2ovxdp + job_id: jgke2z3og job_status: Passed torchscript_onnx_qnn: - inference_time: 9480.0 - throughput: 105.48523206751055 + inference_time: 10140.0 + throughput: 98.61932938856016 estimated_peak_memory_range: min: 651264 - max: 6599824 + max: 6411088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 232 - job_id: jpxk31w95 + job_id: jg9lzvwvg job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:48:38Z' + timestamp: '2024-12-12T00:19:02Z' - torchscript_onnx_tflite: - inference_time: 3374.0 - throughput: 296.38411381149973 + inference_time: 3376.0 + throughput: 296.2085308056872 estimated_peak_memory_range: - min: 16384 - max: 302528104 + min: 20480 + max: 303540728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 328 - job_id: jpv6ew8m5 + job_id: j5q6l83mp job_status: Passed torchscript_onnx_qnn: - inference_time: 3715.0 - throughput: 269.17900403768505 + inference_time: 3641.0 + throughput: 274.64982147761606 estimated_peak_memory_range: - min: 626688 - max: 2045624 + min: 634880 + max: 1816504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 232 - job_id: j5mnozjqp + job_id: jp14n0elp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:48:39Z' + timestamp: '2024-12-12T00:19:03Z' - torchscript_onnx_tflite: - inference_time: 5694.0 - throughput: 175.62346329469617 + inference_time: 5744.0 + throughput: 174.09470752089138 estimated_peak_memory_range: - min: 0 - max: 37110320 + min: 36864 + max: 38937008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 328 - job_id: jgjvol98g + job_id: jglvyn3l5 job_status: Passed torchscript_onnx_qnn: - inference_time: 6182.0 - throughput: 161.75994823681657 + inference_time: 6208.0 + throughput: 161.08247422680412 estimated_peak_memory_range: - min: 622592 - max: 6325696 + min: 663552 + max: 11230000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 232 - job_id: jgn6oejm5 + job_id: jgdxdwolp job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:48:40Z' + timestamp: '2024-12-12T00:19:04Z' - torchscript_onnx_tflite: - inference_time: 9806.0 - throughput: 101.97838058331634 + inference_time: 9819.0 + throughput: 101.84336490477645 estimated_peak_memory_range: - min: 20480 - max: 35227904 + min: 28672 + max: 37302496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 328 - job_id: jped8vq05 + job_id: j56y86n7p job_status: Passed torchscript_onnx_qnn: - inference_time: 10415.0 - throughput: 96.01536245799328 + inference_time: 10421.0 + throughput: 95.96008060646771 estimated_peak_memory_range: - min: 0 - max: 35684368 + min: 618496 + max: 35754224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 232 - job_id: jprvoyzeg + job_id: j57yezdr5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:48:41Z' + timestamp: '2024-12-12T00:19:05Z' - torchscript_onnx_qnn: - inference_time: 3888.0 - throughput: 257.201646090535 + inference_time: 3894.0 + throughput: 256.8053415511043 estimated_peak_memory_range: min: 602112 max: 602112 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 232 - job_id: jgdx8vklp + job_id: jped79k05 job_status: Passed torchscript_onnx: - inference_time: 16248.0 - throughput: 61.54603643525357 + inference_time: 16266.0 + throughput: 61.477929423337024 estimated_peak_memory_range: - min: 60030976 - max: 60030976 + min: 61538304 + max: 61538304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jp8q67l8p + job_id: jpy1oed4p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:48:46Z' + timestamp: '2024-12-12T00:19:10Z' diff --git a/qai_hub_models/models/convnext_tiny_w8a16_quantized/perf.yaml b/qai_hub_models/models/convnext_tiny_w8a16_quantized/perf.yaml index 98198791..2c1d3e21 100644 --- a/qai_hub_models/models/convnext_tiny_w8a16_quantized/perf.yaml +++ b/qai_hub_models/models/convnext_tiny_w8a16_quantized/perf.yaml @@ -46,11 +46,11 @@ models: universal_assets: {} performance_metrics: - torchscript_onnx_qnn: - inference_time: 3449.0 - throughput: 289.93911278631487 + inference_time: 3429.0 + throughput: 291.6302128900554 estimated_peak_memory_range: - min: 12288 - max: 90226776 + min: 16384 + max: 142937624 primary_compute_unit: NPU precision: int8 layer_info: @@ -58,7 +58,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jp3jxmlzg + job_id: jgke2zqog job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -67,13 +67,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:47:45Z' + timestamp: '2024-12-12T00:18:10Z' - torchscript_onnx_qnn: - inference_time: 2425.0 - throughput: 412.37113402061857 + inference_time: 2473.0 + throughput: 404.36716538617065 estimated_peak_memory_range: - min: 856064 - max: 36445472 + min: 0 + max: 38964256 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,7 +81,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jgo2ov7dp + job_id: j5q6l8rmp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -90,13 +90,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:47:46Z' + timestamp: '2024-12-12T00:18:11Z' - torchscript_onnx_qnn: - inference_time: 2444.0 - throughput: 409.16530278232403 + inference_time: 2431.0 + throughput: 411.3533525298231 estimated_peak_memory_range: min: 311296 - max: 37305648 + max: 38133408 primary_compute_unit: NPU precision: int8 layer_info: @@ -104,7 +104,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jpv6ewym5 + job_id: jglvyn2l5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -113,13 +113,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:47:48Z' + timestamp: '2024-12-12T00:18:12Z' - torchscript_onnx_qnn: - inference_time: 13636.0 - throughput: 73.33528894103843 + inference_time: 13088.0 + throughput: 76.40586797066015 estimated_peak_memory_range: - min: 552960 - max: 8502640 + min: 315392 + max: 7666848 primary_compute_unit: NPU precision: int8 layer_info: @@ -127,7 +127,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jgjvol68g + job_id: j56y86z7p job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -136,13 +136,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T02:47:49Z' + timestamp: '2024-12-12T00:18:14Z' - torchscript_onnx_qnn: - inference_time: 3100.0 - throughput: 322.5806451612903 + inference_time: 3096.0 + throughput: 322.99741602067184 estimated_peak_memory_range: min: 335872 - max: 1919232 + max: 1897624 primary_compute_unit: NPU precision: int8 layer_info: @@ -150,7 +150,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jped8v005 + job_id: jp3jzk1zg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -159,13 +159,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:47:50Z' + timestamp: '2024-12-12T00:18:15Z' - torchscript_onnx_qnn: - inference_time: 26853.0 - throughput: 37.23978698841842 + inference_time: 26821.0 + throughput: 37.284217590693856 estimated_peak_memory_range: - min: 720896 - max: 6493248 + min: 245760 + max: 10712800 primary_compute_unit: NPU precision: int8 layer_info: @@ -173,7 +173,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: j5we890j5 + job_id: jpv6l3rm5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -182,13 +182,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:47:52Z' + timestamp: '2024-12-12T00:18:17Z' - torchscript_onnx_qnn: - inference_time: 3110.0 - throughput: 321.54340836012864 + inference_time: 3095.0 + throughput: 323.10177705977384 estimated_peak_memory_range: - min: 368640 - max: 1705704 + min: 327680 + max: 1483480 primary_compute_unit: NPU precision: int8 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jg9lk47vg + job_id: jgjvrx28g job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:47:53Z' + timestamp: '2024-12-12T00:18:19Z' - torchscript_onnx_qnn: - inference_time: 4817.0 - throughput: 207.59809009757112 + inference_time: 4658.0 + throughput: 214.68441391155002 estimated_peak_memory_range: - min: 348160 - max: 6237664 + min: 319488 + max: 6375552 primary_compute_unit: NPU precision: int8 layer_info: @@ -219,7 +219,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jp1478klp + job_id: jped79w05 job_status: Passed reference_device_info: name: SA8295P ADP @@ -228,13 +228,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:47:55Z' + timestamp: '2024-12-12T00:18:20Z' - torchscript_onnx_qnn: - inference_time: 3107.0 - throughput: 321.853878339234 + inference_time: 3108.0 + throughput: 321.75032175032175 estimated_peak_memory_range: - min: 20480 - max: 1354712 + min: 323584 + max: 1857472 primary_compute_unit: NPU precision: int8 layer_info: @@ -242,7 +242,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jgdx8vylp + job_id: jgz3lej65 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -251,13 +251,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:47:56Z' + timestamp: '2024-12-12T00:18:21Z' - torchscript_onnx_qnn: - inference_time: 4469.0 - throughput: 223.76370552696352 + inference_time: 4446.0 + throughput: 224.9212775528565 estimated_peak_memory_range: - min: 319488 - max: 6176576 + min: 245760 + max: 5886336 primary_compute_unit: NPU precision: int8 layer_info: @@ -265,7 +265,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: j57ykdmr5 + job_id: j5welo3j5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -274,13 +274,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:47:57Z' + timestamp: '2024-12-12T00:18:23Z' - torchscript_onnx_qnn: - inference_time: 4227.0 - throughput: 236.5744026496333 + inference_time: 4258.0 + throughput: 234.85204321277595 estimated_peak_memory_range: - min: 331776 - max: 41456000 + min: 315392 + max: 39549152 primary_compute_unit: NPU precision: int8 layer_info: @@ -288,7 +288,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jp4lmw7l5 + job_id: jg9lzvyvg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -297,13 +297,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:47:58Z' + timestamp: '2024-12-12T00:18:24Z' - torchscript_onnx_qnn: - inference_time: 3383.0 - throughput: 295.5956251847473 + inference_time: 3380.0 + throughput: 295.85798816568047 estimated_peak_memory_range: - min: 311296 - max: 311296 + min: 344064 + max: 344064 primary_compute_unit: NPU precision: int8 layer_info: @@ -311,7 +311,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jgz387q65 + job_id: jgo2lyndp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -320,4 +320,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:47:51Z' + timestamp: '2024-12-12T00:18:16Z' diff --git a/qai_hub_models/models/convnext_tiny_w8a8_quantized/evaluate.py b/qai_hub_models/models/convnext_tiny_w8a8_quantized/evaluate.py index 87373ea9..af6d4aa8 100644 --- a/qai_hub_models/models/convnext_tiny_w8a8_quantized/evaluate.py +++ b/qai_hub_models/models/convnext_tiny_w8a8_quantized/evaluate.py @@ -26,7 +26,6 @@ def main(): default_split_size=2500, supported_datasets=SUPPORTED_DATASETS, supports_tflite=False, - supports_onnx=False, is_hub_quantized=True, ) args = parser.parse_args() diff --git a/qai_hub_models/models/convnext_tiny_w8a8_quantized/export.py b/qai_hub_models/models/convnext_tiny_w8a8_quantized/export.py index 91b5ecb4..eff0287a 100644 --- a/qai_hub_models/models/convnext_tiny_w8a8_quantized/export.py +++ b/qai_hub_models/models/convnext_tiny_w8a8_quantized/export.py @@ -242,10 +242,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") parser = export_parser( - model_cls=Model, - supports_tflite=False, - supports_onnx=False, - is_hub_quantized=True, + model_cls=Model, supports_tflite=False, is_hub_quantized=True ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/convnext_tiny_w8a8_quantized/perf.yaml b/qai_hub_models/models/convnext_tiny_w8a8_quantized/perf.yaml index 5f5fc5f8..1ff4b92d 100644 --- a/qai_hub_models/models/convnext_tiny_w8a8_quantized/perf.yaml +++ b/qai_hub_models/models/convnext_tiny_w8a8_quantized/perf.yaml @@ -46,14 +46,15 @@ aggregated: - SA8650P Proxy models: - name: ConvNext-Tiny-w8a8-Quantized - universal_assets: {} + universal_assets: + torchscript_onnx: mqe7x92km performance_metrics: - torchscript_onnx_qnn: - inference_time: 1756.0 - throughput: 569.4760820045558 + inference_time: 1760.0 + throughput: 568.1818181818181 estimated_peak_memory_range: min: 16384 - max: 295636808 + max: 295933096 primary_compute_unit: NPU precision: int8 layer_info: @@ -61,7 +62,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: j5mnovkyp + job_id: jgdxdlxkp + job_status: Passed + torchscript_onnx: + inference_time: 33854.0 + throughput: 29.538606959295798 + estimated_peak_memory_range: + min: 10416128 + max: 16510976 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 379 + layers_on_gpu: 0 + layers_on_cpu: 45 + total_layers: 424 + job_id: j56y8l2vp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -70,13 +86,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T04:08:00Z' + timestamp: '2024-12-12T01:37:06Z' - torchscript_onnx_qnn: - inference_time: 1227.0 - throughput: 814.9959250203749 + inference_time: 1225.0 + throughput: 816.3265306122449 estimated_peak_memory_range: - min: 163840 - max: 22155312 + min: 0 + max: 29153296 primary_compute_unit: NPU precision: int8 layer_info: @@ -84,7 +100,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jgn6orwv5 + job_id: j57ye3yq5 + job_status: Passed + torchscript_onnx: + inference_time: 24323.0 + throughput: 41.113349504584136 + estimated_peak_memory_range: + min: 10969088 + max: 726297920 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 379 + layers_on_gpu: 0 + layers_on_cpu: 45 + total_layers: 424 + job_id: jp3jz2nxg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -93,13 +124,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T04:08:02Z' + timestamp: '2024-12-12T01:37:07Z' - torchscript_onnx_qnn: - inference_time: 1162.0 - throughput: 860.5851979345955 + inference_time: 1002.0 + throughput: 998.003992015968 estimated_peak_memory_range: - min: 159744 - max: 28367616 + min: 0 + max: 29804672 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,7 +138,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jprvo17vg + job_id: jp4ly0lq5 + job_status: Passed + torchscript_onnx: + inference_time: 26578.0 + throughput: 37.62510346903454 + estimated_peak_memory_range: + min: 10399744 + max: 553019520 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 379 + layers_on_gpu: 0 + layers_on_cpu: 45 + total_layers: 424 + job_id: jgo2lqz4p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -116,13 +162,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T04:08:04Z' + timestamp: '2024-12-12T01:37:09Z' - torchscript_onnx_qnn: - inference_time: 6659.0 - throughput: 150.1726986033939 + inference_time: 6519.0 + throughput: 153.39776039269827 estimated_peak_memory_range: - min: 163840 - max: 7665824 + min: 249856 + max: 7390576 primary_compute_unit: NPU precision: int8 layer_info: @@ -130,7 +176,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jp2k43zxp + job_id: jpxkl2kj5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -139,13 +185,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T04:08:06Z' + timestamp: '2024-12-12T01:36:49Z' - torchscript_onnx_qnn: - inference_time: 1683.0 - throughput: 594.1770647653001 + inference_time: 1692.0 + throughput: 591.016548463357 estimated_peak_memory_range: - min: 188416 - max: 1325552 + min: 184320 + max: 1326096 primary_compute_unit: NPU precision: int8 layer_info: @@ -153,7 +199,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jpy1qvyrp + job_id: j5mn0ynyp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -162,13 +208,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T04:08:08Z' + timestamp: '2024-12-12T01:36:51Z' - torchscript_onnx_qnn: - inference_time: 15066.0 - throughput: 66.37461834594451 + inference_time: 15025.0 + throughput: 66.55574043261231 estimated_peak_memory_range: - min: 487424 - max: 6442128 + min: 118784 + max: 10481408 primary_compute_unit: NPU precision: int8 layer_info: @@ -176,7 +222,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jp8q6wkzp + job_id: jprvlj6vg job_status: Passed reference_device_info: name: SA7255P ADP @@ -185,13 +231,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T04:08:11Z' + timestamp: '2024-12-12T01:36:55Z' - torchscript_onnx_qnn: - inference_time: 1699.0 - throughput: 588.5815185403178 + inference_time: 1702.0 + throughput: 587.5440658049354 estimated_peak_memory_range: - min: 192512 - max: 1804528 + min: 184320 + max: 1448960 primary_compute_unit: NPU precision: int8 layer_info: @@ -199,7 +245,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jgkeorkyg + job_id: jp2krnxxp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -208,13 +254,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T04:08:13Z' + timestamp: '2024-12-12T01:36:57Z' - torchscript_onnx_qnn: - inference_time: 3071.0 - throughput: 325.626831650928 + inference_time: 2871.0 + throughput: 348.31069313827936 estimated_peak_memory_range: min: 0 - max: 5810624 + max: 5897568 primary_compute_unit: NPU precision: int8 layer_info: @@ -222,7 +268,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: j5q6z9d7p + job_id: jpy1o0zrp job_status: Passed reference_device_info: name: SA8295P ADP @@ -231,13 +277,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T04:08:15Z' + timestamp: '2024-12-12T01:36:58Z' - torchscript_onnx_qnn: - inference_time: 1692.0 - throughput: 591.016548463357 + inference_time: 1700.0 + throughput: 588.2352941176471 estimated_peak_memory_range: - min: 180224 - max: 1354128 + min: 184320 + max: 1428072 primary_compute_unit: NPU precision: int8 layer_info: @@ -245,7 +291,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jglvoeqe5 + job_id: jp0zm7425 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -254,13 +300,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T04:08:17Z' + timestamp: '2024-12-12T01:37:00Z' - torchscript_onnx_qnn: - inference_time: 2452.0 - throughput: 407.8303425774878 + inference_time: 2453.0 + throughput: 407.6640847941296 estimated_peak_memory_range: min: 0 - max: 5748640 + max: 5926608 primary_compute_unit: NPU precision: int8 layer_info: @@ -268,7 +314,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: j56yrq0vp + job_id: jgke2mvyg job_status: Passed reference_device_info: name: SA8775P ADP @@ -277,13 +323,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T04:08:19Z' + timestamp: '2024-12-12T01:37:02Z' - torchscript_onnx_qnn: - inference_time: 2148.0 - throughput: 465.54934823091247 + inference_time: 2180.0 + throughput: 458.7155963302752 estimated_peak_memory_range: - min: 163840 - max: 26658656 + min: 192512 + max: 31996192 primary_compute_unit: NPU precision: int8 layer_info: @@ -291,7 +337,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jp3jxqrxg + job_id: j5q6lo07p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -300,13 +346,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T04:08:20Z' + timestamp: '2024-12-12T01:37:04Z' - torchscript_onnx_qnn: - inference_time: 1832.0 - throughput: 545.8515283842795 + inference_time: 1846.0 + throughput: 541.7118093174431 estimated_peak_memory_range: - min: 385024 - max: 385024 + min: 405504 + max: 405504 primary_compute_unit: NPU precision: int8 layer_info: @@ -314,7 +360,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jp0zdex25 + job_id: jgn6z80v5 + job_status: Passed + torchscript_onnx: + inference_time: 35338.0 + throughput: 28.29814930103571 + estimated_peak_memory_range: + min: 31043584 + max: 31043584 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 379 + layers_on_gpu: 0 + layers_on_cpu: 45 + total_layers: 424 + job_id: jpv6lxq75 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -323,4 +384,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T04:08:10Z' + timestamp: '2024-12-12T01:37:11Z' diff --git a/qai_hub_models/models/ddrnet23_slim/export.py b/qai_hub_models/models/ddrnet23_slim/export.py index ed97c435..1ea78dbd 100644 --- a/qai_hub_models/models/ddrnet23_slim/export.py +++ b/qai_hub_models/models/ddrnet23_slim/export.py @@ -206,7 +206,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/ddrnet23_slim/perf.yaml b/qai_hub_models/models/ddrnet23_slim/perf.yaml index 9a70e1b5..7fb1be2a 100644 --- a/qai_hub_models/models/ddrnet23_slim/perf.yaml +++ b/qai_hub_models/models/ddrnet23_slim/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: DDRNet23-Slim universal_assets: - torchscript_onnx_tflite: mnlvg6kjm - torchscript_onnx: mn41dlrrn + torchscript_onnx_tflite: mnz1v8kzq + torchscript_onnx: mnl6v8yon performance_metrics: - torchscript_onnx_tflite: - inference_time: 5087.0 - throughput: 196.57951641438962 + inference_time: 5172.0 + throughput: 193.34880123743233 estimated_peak_memory_range: - min: 987136 - max: 20630600 + min: 999424 + max: 17782472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: j5q6z2d7p + job_id: jp0zmy325 job_status: Passed - torchscript_onnx: - inference_time: 7462.0 - throughput: 134.01232913428035 + torchscript_onnx_qnn: + inference_time: 5074.0 + throughput: 197.08316909735908 estimated_peak_memory_range: min: 9850880 - max: 26698472 + max: 26875992 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 153 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 153 + job_id: jped79e75 + job_status: Passed + torchscript_onnx: + inference_time: 7424.0 + throughput: 134.69827586206895 + estimated_peak_memory_range: + min: 1687552 + max: 213931352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 155 - job_id: jp2k4mqmp + job_id: jp4lyqdl5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -84,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:46:59Z' + timestamp: '2024-12-12T00:17:23Z' - torchscript_onnx_tflite: - inference_time: 3548.0 - throughput: 281.8489289740699 + inference_time: 3525.0 + throughput: 283.68794326241135 estimated_peak_memory_range: min: 12288 - max: 32769792 + max: 36112352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -98,14 +113,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: jglvokqe5 + job_id: jp8qeo0zp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 3554.0 + throughput: 281.37310073157005 + estimated_peak_memory_range: + min: 9846784 + max: 43104656 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 153 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 153 + job_id: jgz3leoz5 job_status: Passed torchscript_onnx: - inference_time: 4952.0 - throughput: 201.93861066235866 + inference_time: 4951.0 + throughput: 201.97939810139366 estimated_peak_memory_range: - min: 11911168 - max: 107614416 + min: 11833344 + max: 109937712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 155 - job_id: jpy1qdk4p + job_id: jpxklv695 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -122,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:47:00Z' + timestamp: '2024-12-12T00:17:24Z' - torchscript_onnx_tflite: - inference_time: 2854.0 - throughput: 350.385423966363 + inference_time: 2857.0 + throughput: 350.01750087504377 estimated_peak_memory_range: min: 8192 - max: 29291280 + max: 29563616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,14 +166,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: j56yr10vp + job_id: jgke2z7yg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 3596.0 + throughput: 278.08676307007784 + estimated_peak_memory_range: + min: 9846784 + max: 39730144 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 153 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 153 + job_id: j5welo2z5 job_status: Passed torchscript_onnx: - inference_time: 4110.0 - throughput: 243.30900243309003 + inference_time: 5026.0 + throughput: 198.96538002387584 estimated_peak_memory_range: min: 11894784 - max: 60637888 + max: 62687072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -151,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 155 - job_id: jp0zdr8e5 + job_id: j5mn0r6qp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -160,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:47:01Z' + timestamp: '2024-12-12T00:17:25Z' - torchscript_onnx_tflite: - inference_time: 5226.0 - throughput: 191.35093761959433 + inference_time: 5132.0 + throughput: 194.85580670303975 estimated_peak_memory_range: - min: 1015808 - max: 17741104 + min: 995328 + max: 23159320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,7 +219,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: jp3jxmrxg + job_id: j5q6l8e7p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 4622.0 + throughput: 216.3565556036348 + estimated_peak_memory_range: + min: 9879552 + max: 11079920 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 153 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 153 + job_id: jg9lzvjqg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -183,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:46:37Z' + timestamp: '2024-12-12T00:17:14Z' - torchscript_onnx_tflite: - inference_time: 180307.0 - throughput: 5.546096380062893 + inference_time: 180360.0 + throughput: 5.544466622310933 estimated_peak_memory_range: - min: 1069056 - max: 28028768 + min: 1126400 + max: 28503504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -197,7 +257,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: jgo2ov94p + job_id: jglvyn6e5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 179839.0 + throughput: 5.5605291399529575 + estimated_peak_memory_range: + min: 1372160 + max: 11935104 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 153 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 153 + job_id: jgdxdwekp job_status: Passed reference_device_info: name: SA7255P ADP @@ -206,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:46:38Z' + timestamp: '2024-12-12T00:17:16Z' - torchscript_onnx_tflite: - inference_time: 5107.0 - throughput: 195.8096729978461 + inference_time: 5178.0 + throughput: 193.12475859405177 estimated_peak_memory_range: - min: 987136 - max: 20345384 + min: 1007616 + max: 20677744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -220,7 +295,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: jpv6ewn75 + job_id: j56y86evp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 4583.0 + throughput: 218.1976871045167 + estimated_peak_memory_range: + min: 9916416 + max: 11119856 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 153 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 153 + job_id: j5welo2j5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -229,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:46:40Z' + timestamp: '2024-12-12T00:17:18Z' - torchscript_onnx_tflite: - inference_time: 8933.0 - throughput: 111.94447554013209 + inference_time: 8925.0 + throughput: 112.04481792717087 estimated_peak_memory_range: min: 987136 - max: 22935312 + max: 27448480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -243,7 +333,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: jgjvol87g + job_id: jp3jzkvxg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 8344.0 + throughput: 119.84659635666347 + estimated_peak_memory_range: + min: 57344 + max: 6115392 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 153 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 153 + job_id: jg9lzvjvg job_status: Passed reference_device_info: name: SA8295P ADP @@ -252,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:46:41Z' + timestamp: '2024-12-12T00:17:19Z' - torchscript_onnx_tflite: - inference_time: 5120.0 - throughput: 195.3125 + inference_time: 5078.0 + throughput: 196.92792437967705 estimated_peak_memory_range: - min: 1015808 - max: 20644832 + min: 999424 + max: 20612744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -266,7 +371,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: jped8vn75 + job_id: jgo2lyk4p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 4583.0 + throughput: 218.1976871045167 + estimated_peak_memory_range: + min: 9904128 + max: 11211032 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 153 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 153 + job_id: jp14n0ylp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -275,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:46:42Z' + timestamp: '2024-12-12T00:17:20Z' - torchscript_onnx_tflite: - inference_time: 10093.0 - throughput: 99.07856930545923 + inference_time: 10088.0 + throughput: 99.12767644726408 estimated_peak_memory_range: - min: 1003520 - max: 28574880 + min: 1011712 + max: 30606192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -289,7 +409,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: jgz3870z5 + job_id: jpv6l3075 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 9512.0 + throughput: 105.13036164844407 + estimated_peak_memory_range: + min: 57344 + max: 6151936 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 153 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 153 + job_id: jgdxdwelp job_status: Passed reference_device_info: name: SA8775P ADP @@ -298,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:46:43Z' + timestamp: '2024-12-12T00:17:21Z' - torchscript_onnx_tflite: - inference_time: 7540.0 - throughput: 132.6259946949602 + inference_time: 7791.0 + throughput: 128.3532280836863 estimated_peak_memory_range: min: 987136 - max: 27327024 + max: 29256704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -312,7 +447,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: j5we89rz5 + job_id: jgjvrxz7g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 7663.0 + throughput: 130.49719431032233 + estimated_peak_memory_range: + min: 9125888 + max: 38757792 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 153 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 153 + job_id: j57yezlr5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -321,13 +471,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:46:45Z' - - torchscript_onnx: - inference_time: 8348.0 - throughput: 119.78917105893628 + timestamp: '2024-12-12T00:17:22Z' + - torchscript_onnx_qnn: + inference_time: 4978.0 + throughput: 200.88388911209321 + estimated_peak_memory_range: + min: 9854976 + max: 9854976 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 153 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 153 + job_id: jp14n0ykp + job_status: Passed + torchscript_onnx: + inference_time: 8440.0 + throughput: 118.48341232227489 estimated_peak_memory_range: - min: 9863168 - max: 9863168 + min: 9859072 + max: 9859072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -335,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 155 - job_id: jp8q67d8p + job_id: jgn6z2mm5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -344,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:47:02Z' + timestamp: '2024-12-12T00:17:27Z' diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet/perf.yaml b/qai_hub_models/models/deeplabv3_plus_mobilenet/perf.yaml index c2c72cb8..ba55d691 100644 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet/perf.yaml +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: DeepLabV3-Plus-MobileNet universal_assets: - torchscript_onnx_tflite: mm5dweo6n - torchscript_onnx: mn0x9j7xn + torchscript_onnx_tflite: mqkvk971m + torchscript_onnx: mno63dopn performance_metrics: - torchscript_onnx_tflite: - inference_time: 13814.0 - throughput: 72.39032865209208 + inference_time: 13690.0 + throughput: 73.04601899196494 estimated_peak_memory_range: - min: 36864 - max: 15145504 + min: 315392 + max: 18233864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 101 - job_id: jprvoydvg + job_id: jp4lyqlq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 14022.0 - throughput: 71.3165026387106 + inference_time: 14010.0 + throughput: 71.3775874375446 estimated_peak_memory_range: - min: 3293184 - max: 23780800 + min: 20480 + max: 17659368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgo2ovw4p + job_id: j5q6l807p job_status: Passed torchscript_onnx: - inference_time: 12104.0 - throughput: 82.61731658955718 + inference_time: 12153.0 + throughput: 82.28420966016621 estimated_peak_memory_range: - min: 36864 - max: 28380872 + min: 3293184 + max: 223934432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jpxk31rj5 + job_id: jp14n02kp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:46:10Z' + timestamp: '2024-12-12T00:16:36Z' - torchscript_onnx_tflite: - inference_time: 9519.0 - throughput: 105.05305179115453 + inference_time: 9530.0 + throughput: 104.9317943336831 estimated_peak_memory_range: - min: 311296 - max: 32640352 + min: 122880 + max: 34330576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 101 - job_id: jp2k4mdxp + job_id: jpxklvkj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 9696.0 - throughput: 103.13531353135313 + inference_time: 9690.0 + throughput: 103.19917440660474 estimated_peak_memory_range: - min: 3260416 - max: 37377664 + min: 3301376 + max: 39244304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jpv6ewm75 + job_id: jglvyn4e5 job_status: Passed torchscript_onnx: - inference_time: 8240.0 - throughput: 121.35922330097087 + inference_time: 8237.0 + throughput: 121.40342357654485 estimated_peak_memory_range: - min: 3809280 - max: 97404144 + min: 577536 + max: 94999344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: j5mnozkyp + job_id: jgdxdwnkp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:46:11Z' + timestamp: '2024-12-12T00:16:37Z' - torchscript_onnx_tflite: - inference_time: 7813.0 - throughput: 127.99180852425445 + inference_time: 9401.0 + throughput: 106.37166258908627 estimated_peak_memory_range: min: 282624 - max: 31413504 + max: 32220528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 101 - job_id: jpy1qd2rp + job_id: j5mn0rnyp job_status: Passed torchscript_onnx_qnn: - inference_time: 9494.0 - throughput: 105.32968190436065 + inference_time: 9493.0 + throughput: 105.34077741493732 estimated_peak_memory_range: - min: 3256320 - max: 34302368 + min: 1155072 + max: 33931344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgjvoly7g + job_id: j56y862vp job_status: Passed torchscript_onnx: - inference_time: 8339.0 - throughput: 119.9184554502938 + inference_time: 8377.0 + throughput: 119.3744777366599 estimated_peak_memory_range: - min: 3825664 - max: 54546032 + min: 3805184 + max: 57310496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jgn6oewv5 + job_id: j57yez2q5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:46:12Z' + timestamp: '2024-12-12T00:16:38Z' - torchscript_onnx_tflite: - inference_time: 13698.0 - throughput: 73.00335815447511 + inference_time: 13659.0 + throughput: 73.21180174244088 estimated_peak_memory_range: - min: 307200 - max: 20380848 + min: 28672 + max: 16438816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 101 - job_id: jp0zdr925 + job_id: jgn6z20v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 12285.0 - throughput: 81.4000814000814 + inference_time: 12315.0 + throughput: 81.20178643930167 estimated_peak_memory_range: - min: 3293184 - max: 4916496 + min: 3284992 + max: 4807792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jped8vx75 + job_id: jp3jzknxg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:46:01Z' + timestamp: '2024-12-12T00:16:27Z' - torchscript_onnx_tflite: - inference_time: 420904.0 - throughput: 2.375838671050881 + inference_time: 420854.0 + throughput: 2.3761209350511105 estimated_peak_memory_range: - min: 348160 - max: 28346640 + min: 40960 + max: 28789024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 101 - job_id: jp8q67rzp + job_id: jprvlk6vg job_status: Passed torchscript_onnx_qnn: - inference_time: 417138.0 - throughput: 2.3972881876021845 + inference_time: 417171.0 + throughput: 2.397098551912765 estimated_peak_memory_range: - min: 704512 - max: 6546848 + min: 1884160 + max: 12457488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j5we89zz5 + job_id: jpv6l3q75 job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:46:03Z' + timestamp: '2024-12-12T00:16:29Z' - torchscript_onnx_tflite: - inference_time: 13691.0 - throughput: 73.04068366079906 + inference_time: 13748.0 + throughput: 72.73785277858597 estimated_peak_memory_range: - min: 286720 - max: 21015440 + min: 307200 + max: 17880480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 101 - job_id: jgkeoy0yg + job_id: jp2kr8xxp job_status: Passed torchscript_onnx_qnn: - inference_time: 12351.0 - throughput: 80.96510404015869 + inference_time: 12284.0 + throughput: 81.406707912732 estimated_peak_memory_range: - min: 3338240 - max: 4659064 + min: 3321856 + max: 5308080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jg9lk42qg + job_id: jgjvrxd7g job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:46:04Z' + timestamp: '2024-12-12T00:16:30Z' - torchscript_onnx_tflite: - inference_time: 22690.0 - throughput: 44.072278536800354 + inference_time: 22775.0 + throughput: 43.90779363336992 estimated_peak_memory_range: - min: 307200 - max: 29447920 + min: 303104 + max: 33432000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 101 - job_id: j5q6z217p + job_id: jpy1oezrp job_status: Passed torchscript_onnx_qnn: - inference_time: 21178.0 - throughput: 47.218811974690716 + inference_time: 21001.0 + throughput: 47.61678015332603 estimated_peak_memory_range: - min: 49152 - max: 5972096 + min: 57344 + max: 6103488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jp14781kp + job_id: jped79o75 job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:46:05Z' + timestamp: '2024-12-12T00:16:31Z' - torchscript_onnx_tflite: - inference_time: 13689.0 - throughput: 73.05135510263716 + inference_time: 13685.0 + throughput: 73.07270734380708 estimated_peak_memory_range: - min: 36864 - max: 16961864 + min: 315392 + max: 18092904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 101 - job_id: jglvok8e5 + job_id: jp0zmy425 job_status: Passed torchscript_onnx_qnn: - inference_time: 12297.0 - throughput: 81.3206473123526 + inference_time: 12280.0 + throughput: 81.43322475570032 estimated_peak_memory_range: - min: 3293184 - max: 4576080 + min: 3289088 + max: 4696816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgdx8v4kp + job_id: jgz3le2z5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:46:07Z' + timestamp: '2024-12-12T00:16:32Z' - torchscript_onnx_tflite: - inference_time: 24797.0 - throughput: 40.327458966810504 + inference_time: 24753.0 + throughput: 40.39914353815699 estimated_peak_memory_range: - min: 315392 - max: 29077280 + min: 327680 + max: 30852912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 101 - job_id: j56yr1mvp + job_id: jp8qeo2zp job_status: Passed torchscript_onnx_qnn: - inference_time: 22276.0 - throughput: 44.8913629017777 + inference_time: 22311.0 + throughput: 44.8209403433284 estimated_peak_memory_range: - min: 1605632 - max: 7337392 + min: 2240512 + max: 8097872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j57ykdnq5 + job_id: j5welowz5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:46:08Z' + timestamp: '2024-12-12T00:16:34Z' - torchscript_onnx_tflite: - inference_time: 19683.0 - throughput: 50.80526342529086 + inference_time: 19633.0 + throughput: 50.93465084296847 estimated_peak_memory_range: - min: 311296 - max: 34766256 + min: 303104 + max: 36833248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 101 - job_id: jp3jxm7xg + job_id: jgke2zvyg job_status: Passed torchscript_onnx_qnn: - inference_time: 20525.0 - throughput: 48.721071863580995 + inference_time: 20831.0 + throughput: 48.005376602179446 estimated_peak_memory_range: - min: 3284992 - max: 37997824 + min: 3276800 + max: 39895264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jp4lmw4q5 + job_id: jg9lzv0qg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:46:09Z' + timestamp: '2024-12-12T00:16:35Z' - torchscript_onnx_qnn: - inference_time: 13050.0 - throughput: 76.62835249042146 + inference_time: 13025.0 + throughput: 76.77543186180422 estimated_peak_memory_range: min: 3256320 max: 3256320 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgz387yz5 + job_id: jgo2lyz4p job_status: Passed torchscript_onnx: - inference_time: 13556.0 - throughput: 73.7680731779286 + inference_time: 13561.0 + throughput: 73.74087456677236 estimated_peak_memory_range: - min: 12525568 - max: 12525568 + min: 12464128 + max: 12464128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jprvoy7vg + job_id: jp4lyqnq5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:46:13Z' + timestamp: '2024-12-12T00:16:39Z' diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/evaluate.py b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/evaluate.py index ae04db87..525a9183 100644 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/evaluate.py +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/evaluate.py @@ -25,6 +25,7 @@ def main(): model_cls=Model, default_split_size=400, supported_datasets=SUPPORTED_DATASETS, + supports_onnx=False, is_hub_quantized=True, ) args = parser.parse_args() diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/export.py b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/export.py index 12d043a9..a3651ab5 100644 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/export.py +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/export.py @@ -237,7 +237,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, is_hub_quantized=True) + parser = export_parser(model_cls=Model, supports_onnx=False, is_hub_quantized=True) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/perf.yaml b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/perf.yaml index 9f20412a..fb786892 100644 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/perf.yaml +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/perf.yaml @@ -50,14 +50,14 @@ aggregated: models: - name: DeepLabV3-Plus-MobileNet-Quantized universal_assets: - torchscript_onnx_tflite: mnwe1gzrn + torchscript_onnx_tflite: mnw8ez0pn performance_metrics: - torchscript_onnx_tflite: - inference_time: 4171.0 - throughput: 239.7506593143131 + inference_time: 4165.0 + throughput: 240.09603841536614 estimated_peak_memory_range: - min: 12288 - max: 6755960 + min: 16384 + max: 12809712 primary_compute_unit: NPU precision: int8 layer_info: @@ -65,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 136 - job_id: jpy1qvo0p + job_id: jp2kr6n6p job_status: Passed torchscript_onnx_qnn: - inference_time: 4745.0 - throughput: 210.7481559536354 + inference_time: 4764.0 + throughput: 209.90764063811923 estimated_peak_memory_range: - min: 20480 - max: 11582960 + min: 12288 + max: 15848488 primary_compute_unit: NPU precision: int8 layer_info: @@ -80,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: j5we8my45 + job_id: jped73m85 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -89,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T04:07:02Z' + timestamp: '2024-12-12T01:35:22Z' - torchscript_onnx_tflite: - inference_time: 2986.0 - throughput: 334.8961821835231 + inference_time: 2993.0 + throughput: 334.1129301703976 estimated_peak_memory_range: min: 12288 - max: 38704096 + max: 41929392 primary_compute_unit: NPU precision: int8 layer_info: @@ -103,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 136 - job_id: jp0zdeo05 + job_id: jpy1ow00p job_status: Passed torchscript_onnx_qnn: - inference_time: 3448.0 - throughput: 290.0232018561485 + inference_time: 3449.0 + throughput: 289.93911278631487 estimated_peak_memory_range: - min: 827392 - max: 34622768 + min: 0 + max: 38253168 primary_compute_unit: NPU precision: int8 layer_info: @@ -118,7 +118,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jg9lk9omg + job_id: jgz3lkd45 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -127,13 +127,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T04:07:04Z' + timestamp: '2024-12-12T01:35:25Z' - torchscript_onnx_tflite: - inference_time: 2785.0 - throughput: 359.0664272890485 + inference_time: 2819.0 + throughput: 354.735721887194 estimated_peak_memory_range: min: 8192 - max: 32811088 + max: 36558272 primary_compute_unit: NPU precision: int8 layer_info: @@ -141,14 +141,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 136 - job_id: jp8q6wjqp + job_id: jp0zm7005 job_status: Passed torchscript_onnx_qnn: - inference_time: 3447.0 - throughput: 290.1073397156948 + inference_time: 3466.0 + throughput: 288.51702250432777 estimated_peak_memory_range: - min: 22941696 - max: 53541648 + min: 823296 + max: 35192400 primary_compute_unit: NPU precision: int8 layer_info: @@ -156,7 +156,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jp147qonp + job_id: j5weln645 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -165,13 +165,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T04:07:07Z' + timestamp: '2024-12-12T01:35:27Z' - torchscript_onnx_tflite: - inference_time: 17946.0 - throughput: 55.722723726735765 + inference_time: 18168.0 + throughput: 55.04183179216204 estimated_peak_memory_range: - min: 303104 - max: 43055888 + min: 307200 + max: 45062960 primary_compute_unit: NPU precision: int8 layer_info: @@ -179,14 +179,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 136 - job_id: jgkeor6vg + job_id: jp8qevyqp job_status: Passed torchscript_onnx_qnn: - inference_time: 19682.0 - throughput: 50.8078447312265 + inference_time: 19691.0 + throughput: 50.78462241633233 estimated_peak_memory_range: - min: 892928 - max: 9134928 + min: 860160 + max: 7807584 primary_compute_unit: NPU precision: int8 layer_info: @@ -194,7 +194,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jgdx8766p + job_id: jg9lzenmg job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -203,13 +203,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T04:06:41Z' + timestamp: '2024-12-12T01:35:30Z' - torchscript_onnx_tflite: - inference_time: 163773.0 - throughput: 6.106012590597961 + inference_time: 164857.0 + throughput: 6.0658631419957905 estimated_peak_memory_range: - min: 3928064 - max: 12259200 + min: 3653632 + max: 6689856 primary_compute_unit: NPU precision: int8 layer_info: @@ -217,7 +217,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 136 - job_id: j5q6z94ep + job_id: jgke2mxvg job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -226,13 +226,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T04:06:16Z' + timestamp: '2024-12-12T01:35:04Z' - torchscript_onnx_tflite: - inference_time: 4168.0 - throughput: 239.9232245681382 + inference_time: 4194.0 + throughput: 238.43586075345732 estimated_peak_memory_range: min: 16384 - max: 17125096 + max: 11393056 primary_compute_unit: NPU precision: int8 layer_info: @@ -240,14 +240,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 136 - job_id: jglvoew25 + job_id: j5q6loqep job_status: Passed torchscript_onnx_qnn: - inference_time: 3894.0 - throughput: 256.8053415511043 + inference_time: 3948.0 + throughput: 253.29280648429585 estimated_peak_memory_range: - min: 843776 - max: 2703072 + min: 839680 + max: 2066360 primary_compute_unit: NPU precision: int8 layer_info: @@ -255,7 +255,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: j5we8myz5 + job_id: jp14nxznp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -264,13 +264,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T04:06:43Z' + timestamp: '2024-12-12T01:35:32Z' - torchscript_onnx_tflite: - inference_time: 54915.0 - throughput: 18.209960848584174 + inference_time: 54904.0 + throughput: 18.213609208800815 estimated_peak_memory_range: - min: 348160 - max: 31567936 + min: 344064 + max: 32136800 primary_compute_unit: NPU precision: int8 layer_info: @@ -278,14 +278,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 136 - job_id: j56yrqonp + job_id: jglvyrm25 job_status: Passed torchscript_onnx_qnn: - inference_time: 55251.0 - throughput: 18.09921992362129 + inference_time: 55303.0 + throughput: 18.08220168887764 estimated_peak_memory_range: - min: 921600 - max: 6777504 + min: 847872 + max: 11342480 primary_compute_unit: NPU precision: int8 layer_info: @@ -293,7 +293,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jp147qokp + job_id: j57ye3rn5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -302,13 +302,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T04:06:48Z' + timestamp: '2024-12-12T01:35:36Z' - torchscript_onnx_tflite: - inference_time: 4182.0 - throughput: 239.12003825920613 + inference_time: 4160.0 + throughput: 240.3846153846154 estimated_peak_memory_range: min: 12288 - max: 13129192 + max: 13021560 primary_compute_unit: NPU precision: int8 layer_info: @@ -316,14 +316,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 136 - job_id: jp3jxqomg + job_id: j56y8l4np job_status: Passed torchscript_onnx_qnn: - inference_time: 3910.0 - throughput: 255.7544757033248 + inference_time: 3937.0 + throughput: 254.00050800101602 estimated_peak_memory_range: min: 843776 - max: 2163544 + max: 2186384 primary_compute_unit: NPU precision: int8 layer_info: @@ -331,7 +331,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jgdx876kp + job_id: jp4ly0r25 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -340,13 +340,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T04:06:50Z' + timestamp: '2024-12-12T01:35:39Z' - torchscript_onnx_tflite: - inference_time: 6651.0 - throughput: 150.35333032626673 + inference_time: 6619.0 + throughput: 151.08022359873092 estimated_peak_memory_range: min: 286720 - max: 30515664 + max: 35488064 primary_compute_unit: NPU precision: int8 layer_info: @@ -354,14 +354,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 136 - job_id: jgo2oed1p + job_id: jp3jz20mg job_status: Passed torchscript_onnx_qnn: - inference_time: 6453.0 - throughput: 154.9666821633349 + inference_time: 6472.0 + throughput: 154.51174289245984 estimated_peak_memory_range: - min: 860160 - max: 6704160 + min: 888832 + max: 7046880 primary_compute_unit: NPU precision: int8 layer_info: @@ -369,7 +369,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: j57ykvoq5 + job_id: jpxkl2o85 job_status: Passed reference_device_info: name: SA8295P ADP @@ -378,13 +378,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T04:06:53Z' + timestamp: '2024-12-12T01:35:41Z' - torchscript_onnx_tflite: - inference_time: 4161.0 - throughput: 240.3268445085316 + inference_time: 4164.0 + throughput: 240.15369836695484 estimated_peak_memory_range: - min: 12288 - max: 15035808 + min: 16384 + max: 17036504 primary_compute_unit: NPU precision: int8 layer_info: @@ -392,14 +392,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 136 - job_id: jpv6ez2z5 + job_id: jgo2lq61p job_status: Passed torchscript_onnx_qnn: - inference_time: 3912.0 - throughput: 255.6237218813906 + inference_time: 3910.0 + throughput: 255.7544757033248 estimated_peak_memory_range: - min: 901120 - max: 2193848 + min: 843776 + max: 2118416 primary_compute_unit: NPU precision: int8 layer_info: @@ -407,7 +407,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jp4lmjeq5 + job_id: j5mn0yx7p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -416,13 +416,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T04:06:55Z' + timestamp: '2024-12-12T01:35:43Z' - torchscript_onnx_tflite: - inference_time: 5729.0 - throughput: 174.55053237912375 + inference_time: 5733.0 + throughput: 174.4287458573173 estimated_peak_memory_range: - min: 0 - max: 31728448 + min: 286720 + max: 34669040 primary_compute_unit: NPU precision: int8 layer_info: @@ -430,14 +430,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 136 - job_id: jgjvok31g + job_id: jpv6lxkz5 job_status: Passed torchscript_onnx_qnn: - inference_time: 5523.0 - throughput: 181.0610175629187 + inference_time: 5497.0 + throughput: 181.91740949608877 estimated_peak_memory_range: - min: 831488 - max: 6412512 + min: 884736 + max: 6825424 primary_compute_unit: NPU precision: int8 layer_info: @@ -445,7 +445,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jpxk3e0j5 + job_id: jgn6z8vj5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -454,13 +454,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T04:06:57Z' + timestamp: '2024-12-12T01:35:46Z' - torchscript_onnx_tflite: - inference_time: 4831.0 - throughput: 206.99648105982197 + inference_time: 5016.0 + throughput: 199.36204146730464 estimated_peak_memory_range: - min: 12288 - max: 38316336 + min: 286720 + max: 38241936 primary_compute_unit: NPU precision: int8 layer_info: @@ -468,14 +468,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 136 - job_id: jgz38vz45 + job_id: jgjvr4n1g job_status: Passed torchscript_onnx_qnn: - inference_time: 5602.0 - throughput: 178.5076758300607 + inference_time: 5510.0 + throughput: 181.48820326678765 estimated_peak_memory_range: min: 827392 - max: 36051360 + max: 37763104 primary_compute_unit: NPU precision: int8 layer_info: @@ -483,7 +483,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: j5mnov9yp + job_id: jprvlj3kg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -492,10 +492,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T04:07:00Z' + timestamp: '2024-12-12T01:35:48Z' - torchscript_onnx_qnn: - inference_time: 4260.0 - throughput: 234.7417840375587 + inference_time: 4272.0 + throughput: 234.0823970037453 estimated_peak_memory_range: min: 815104 max: 815104 @@ -506,7 +506,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jg9lk9oqg + job_id: jgdxdl16p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -515,4 +515,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T04:07:09Z' + timestamp: '2024-12-12T01:35:34Z' diff --git a/qai_hub_models/models/deeplabv3_resnet50/perf.yaml b/qai_hub_models/models/deeplabv3_resnet50/perf.yaml index 6bdc183a..f36d5234 100644 --- a/qai_hub_models/models/deeplabv3_resnet50/perf.yaml +++ b/qai_hub_models/models/deeplabv3_resnet50/perf.yaml @@ -22,7 +22,6 @@ aggregated: - QCS8450 (Proxy) - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - - SA7255P ADP - SA8255 (Proxy) - SA8295P ADP - SA8650 (Proxy) @@ -37,21 +36,20 @@ aggregated: - SA8775P - QCS8450 Proxy - QCS8550 Proxy - - SA7255P - SA8255P Proxy - SA8295P - SA8650P Proxy models: - name: DeepLabV3-ResNet50 universal_assets: - torchscript_onnx_tflite: mnzv41woq + torchscript_onnx_tflite: mqe7x5w5m performance_metrics: - torchscript_onnx_tflite: - inference_time: 290866.0 - throughput: 3.4380092551209147 + inference_time: 293273.0 + throughput: 3.409792241358734 estimated_peak_memory_range: - min: 0 - max: 172482000 + min: 962560 + max: 149514576 primary_compute_unit: GPU precision: fp16 layer_info: @@ -59,7 +57,7 @@ models: layers_on_gpu: 98 layers_on_cpu: 2 total_layers: 100 - job_id: jprvoyxvg + job_id: jpxklvoj5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -68,13 +66,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:45:04Z' + timestamp: '2024-12-12T00:15:34Z' - torchscript_onnx_tflite: - inference_time: 206660.0 - throughput: 4.838865769863544 + inference_time: 213961.0 + throughput: 4.673748954248672 estimated_peak_memory_range: - min: 21938176 - max: 48618688 + min: 22913024 + max: 49561376 primary_compute_unit: GPU precision: fp16 layer_info: @@ -82,7 +80,7 @@ models: layers_on_gpu: 98 layers_on_cpu: 2 total_layers: 100 - job_id: jp2k4moxp + job_id: j5mn0rxyp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -91,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:45:05Z' + timestamp: '2024-12-12T00:15:35Z' - torchscript_onnx_tflite: - inference_time: 216408.0 - throughput: 4.620901260581864 + inference_time: 216568.0 + throughput: 4.6174873480846665 estimated_peak_memory_range: - min: 12730368 - max: 29132224 + min: 139264 + max: 17276624 primary_compute_unit: GPU precision: fp16 layer_info: @@ -105,7 +103,7 @@ models: layers_on_gpu: 98 layers_on_cpu: 2 total_layers: 100 - job_id: jpy1qd8rp + job_id: jgn6z26v5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -114,13 +112,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:45:06Z' + timestamp: '2024-12-12T00:15:36Z' - torchscript_onnx_tflite: - inference_time: 291878.0 - throughput: 3.4260889823830505 + inference_time: 292000.0 + throughput: 3.4246575342465753 estimated_peak_memory_range: - min: 0 - max: 148663184 + min: 2215936 + max: 151186664 primary_compute_unit: GPU precision: fp16 layer_info: @@ -128,7 +126,7 @@ models: layers_on_gpu: 98 layers_on_cpu: 2 total_layers: 100 - job_id: jp0zdro25 + job_id: jprvlkvvg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -137,36 +135,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:45:08Z' - - torchscript_onnx_tflite: - inference_time: 2151421.0 - throughput: 0.4648090727012519 - estimated_peak_memory_range: - min: 21700608 - max: 43721712 - primary_compute_unit: GPU - precision: fp16 - layer_info: - layers_on_npu: 0 - layers_on_gpu: 98 - layers_on_cpu: 2 - total_layers: 100 - job_id: jp8q67jzp - job_status: Passed - reference_device_info: + timestamp: '2024-12-12T00:15:37Z' + - reference_device_info: name: SA7255P ADP os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:45:09Z' + timestamp: '2024-12-12T00:15:38Z' - torchscript_onnx_tflite: - inference_time: 291325.0 - throughput: 3.432592465459538 + inference_time: 291631.0 + throughput: 3.4289907451539787 estimated_peak_memory_range: - min: 5947392 - max: 183175120 + min: 53248 + max: 311329520 primary_compute_unit: GPU precision: fp16 layer_info: @@ -174,7 +157,7 @@ models: layers_on_gpu: 98 layers_on_cpu: 2 total_layers: 100 - job_id: jgkeoy6yg + job_id: jpy1oe1rp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -183,13 +166,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:45:10Z' + timestamp: '2024-12-12T00:15:39Z' - torchscript_onnx_tflite: - inference_time: 281323.0 - throughput: 3.554632930830398 + inference_time: 283483.0 + throughput: 3.527548389145028 estimated_peak_memory_range: - min: 6545408 - max: 27529184 + min: 23920640 + max: 45019104 primary_compute_unit: GPU precision: fp16 layer_info: @@ -197,7 +180,7 @@ models: layers_on_gpu: 98 layers_on_cpu: 2 total_layers: 100 - job_id: j5q6z247p + job_id: jp0zmyz25 job_status: Passed reference_device_info: name: SA8295P ADP @@ -206,13 +189,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:45:11Z' + timestamp: '2024-12-12T00:15:40Z' - torchscript_onnx_tflite: - inference_time: 291540.0 - throughput: 3.4300610550867807 + inference_time: 298254.0 + throughput: 3.352846902304747 estimated_peak_memory_range: - min: 0 - max: 169064816 + min: 200704 + max: 148608192 primary_compute_unit: GPU precision: fp16 layer_info: @@ -220,7 +203,7 @@ models: layers_on_gpu: 98 layers_on_cpu: 2 total_layers: 100 - job_id: jglvokwe5 + job_id: jp8qeoqzp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -229,13 +212,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:45:12Z' + timestamp: '2024-12-12T00:15:41Z' - torchscript_onnx_tflite: - inference_time: 592859.0 - throughput: 1.6867417041826134 + inference_time: 593464.0 + throughput: 1.6850221748918215 estimated_peak_memory_range: - min: 23183360 - max: 45935216 + min: 23945216 + max: 44352608 primary_compute_unit: GPU precision: fp16 layer_info: @@ -243,7 +226,7 @@ models: layers_on_gpu: 98 layers_on_cpu: 2 total_layers: 100 - job_id: j56yr1ovp + job_id: j5q6l867p job_status: Passed reference_device_info: name: SA8775P ADP @@ -252,13 +235,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:45:14Z' + timestamp: '2024-12-12T00:15:42Z' - torchscript_onnx_tflite: - inference_time: 408225.0 - throughput: 2.4496294935391023 + inference_time: 414518.0 + throughput: 2.412440473031328 estimated_peak_memory_range: - min: 23994368 - max: 54013152 + min: 45056 + max: 31763600 primary_compute_unit: GPU precision: fp16 layer_info: @@ -266,7 +249,7 @@ models: layers_on_gpu: 98 layers_on_cpu: 2 total_layers: 100 - job_id: jp3jxmoxg + job_id: jglvynve5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -275,4 +258,4 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:45:15Z' + timestamp: '2024-12-12T00:15:43Z' diff --git a/qai_hub_models/models/densenet121/perf.yaml b/qai_hub_models/models/densenet121/perf.yaml index 9838c88f..ae1c8eb2 100644 --- a/qai_hub_models/models/densenet121/perf.yaml +++ b/qai_hub_models/models/densenet121/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: DenseNet-121 universal_assets: - torchscript_onnx_tflite: mqexp7eyn - torchscript_onnx: mn0x9j1xn + torchscript_onnx_tflite: mn1wz2v4m + torchscript_onnx: mmr36k42m performance_metrics: - torchscript_onnx_tflite: - inference_time: 1926.0 - throughput: 519.2107995846313 + inference_time: 1934.0 + throughput: 517.063081695967 estimated_peak_memory_range: - min: 20480 - max: 49482784 + min: 28672 + max: 49795080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 312 - job_id: jp1478jkp + job_id: j5weljnz5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1996.0 - throughput: 501.00200400801606 + inference_time: 1986.0 + throughput: 503.5246727089627 estimated_peak_memory_range: - min: 20480 - max: 28153624 + min: 16384 + max: 17519496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: jp0zdrm25 + job_id: jp2kr8yxp job_status: Passed torchscript_onnx: - inference_time: 1866.0 - throughput: 535.9056806002144 + inference_time: 1865.0 + throughput: 536.1930294906166 estimated_peak_memory_range: - min: 12288 - max: 18174560 + min: 16384 + max: 17923936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jgz387lz5 + job_id: jgjvrxn7g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:44:34Z' + timestamp: '2024-12-12T00:15:04Z' - torchscript_onnx_tflite: - inference_time: 1281.0 - throughput: 780.64012490242 + inference_time: 1291.0 + throughput: 774.5933384972889 estimated_peak_memory_range: min: 16384 - max: 22158272 + max: 24392992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 312 - job_id: jgdx8v3kp + job_id: jg9lz6eqg job_status: Passed torchscript_onnx_qnn: inference_time: 1334.0 throughput: 749.6251874062968 estimated_peak_memory_range: min: 618496 - max: 21409680 + max: 22139712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: jp8q67ezp + job_id: jpy1oe3rp job_status: Passed torchscript_onnx: - inference_time: 1327.0 - throughput: 753.5795026375282 + inference_time: 1314.0 + throughput: 761.03500761035 estimated_peak_memory_range: min: 0 - max: 111195584 + max: 113280880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: j5we89lz5 + job_id: jped79m75 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:44:35Z' + timestamp: '2024-12-12T00:15:05Z' - torchscript_onnx_tflite: - inference_time: 1248.0 - throughput: 801.2820512820513 + inference_time: 1232.0 + throughput: 811.6883116883117 estimated_peak_memory_range: min: 12288 - max: 20469952 + max: 23812080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 312 - job_id: j57ykd4q5 + job_id: jp14nrxkp job_status: Passed torchscript_onnx_qnn: - inference_time: 1064.0 - throughput: 939.8496240601504 + inference_time: 1295.0 + throughput: 772.2007722007722 estimated_peak_memory_range: - min: 614400 - max: 20018368 + min: 0 + max: 22418992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: jgkeoy2yg + job_id: jp0zmy025 job_status: Passed torchscript_onnx: - inference_time: 1299.0 - throughput: 769.8229407236336 + inference_time: 1063.0 + throughput: 940.7337723424271 estimated_peak_memory_range: min: 0 - max: 32390256 + max: 35154160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jg9lk4zqg + job_id: jgz3ledz5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:44:36Z' + timestamp: '2024-12-12T00:15:06Z' - torchscript_onnx_tflite: - inference_time: 1921.0 - throughput: 520.5622071837585 + inference_time: 1932.0 + throughput: 517.5983436853002 estimated_peak_memory_range: min: 16384 - max: 8442456 + max: 50254896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 312 - job_id: jp4lmw1q5 + job_id: jgdxdjlkp job_status: Passed torchscript_onnx_qnn: - inference_time: 1789.0 - throughput: 558.9714924538848 + inference_time: 1809.0 + throughput: 552.791597567717 estimated_peak_memory_range: min: 634880 - max: 1978400 + max: 1848808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: j5q6z2l7p + job_id: jp8qeoyzp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:44:24Z' + timestamp: '2024-12-12T00:14:54Z' - torchscript_onnx_tflite: - inference_time: 53464.0 - throughput: 18.70417477180907 + inference_time: 53422.0 + throughput: 18.718879862229045 estimated_peak_memory_range: - min: 98304 - max: 20046240 + min: 57344 + max: 20683584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 312 - job_id: jpxk314j5 + job_id: j57yeq3q5 job_status: Passed torchscript_onnx_qnn: - inference_time: 53597.0 - throughput: 18.65776069556132 + inference_time: 53640.0 + throughput: 18.642803877703205 estimated_peak_memory_range: - min: 3162112 - max: 8724864 + min: 618496 + max: 11160736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: j56yr18vp + job_id: j5q6l8q7p job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:44:27Z' + timestamp: '2024-12-12T00:14:57Z' - torchscript_onnx_tflite: - inference_time: 1921.0 - throughput: 520.5622071837585 + inference_time: 1930.0 + throughput: 518.1347150259068 estimated_peak_memory_range: min: 16384 - max: 72216040 + max: 49963848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 312 - job_id: j5mnozmyp + job_id: jp4lyz0q5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1799.0 - throughput: 555.864369093941 + inference_time: 1798.0 + throughput: 556.1735261401557 estimated_peak_memory_range: - min: 634880 - max: 2005744 + min: 630784 + max: 1756936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: jp3jxmzxg + job_id: jglvynme5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:44:28Z' + timestamp: '2024-12-12T00:14:58Z' - torchscript_onnx_tflite: - inference_time: 3319.0 - throughput: 301.29557095510694 + inference_time: 3306.0 + throughput: 302.48033877797945 estimated_peak_memory_range: min: 16384 - max: 18327872 + max: 18184672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 312 - job_id: jgn6oezv5 + job_id: jpxklw2j5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3286.0 - throughput: 304.32136335970785 + inference_time: 3299.0 + throughput: 303.12215822976657 estimated_peak_memory_range: min: 0 - max: 5906720 + max: 6310624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: jgo2ovl4p + job_id: j56y864vp job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:44:29Z' + timestamp: '2024-12-12T00:14:59Z' - torchscript_onnx_tflite: - inference_time: 1932.0 - throughput: 517.5983436853002 + inference_time: 1926.0 + throughput: 519.2107995846313 estimated_peak_memory_range: - min: 20480 - max: 5937344 + min: 28672 + max: 49696648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 312 - job_id: jprvoylvg + job_id: j5mn0jyyp job_status: Passed torchscript_onnx_qnn: - inference_time: 1794.0 - throughput: 557.4136008918617 + inference_time: 1802.0 + throughput: 554.9389567147614 estimated_peak_memory_range: - min: 143360 - max: 1462128 + min: 634880 + max: 1890888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: jpv6ewl75 + job_id: jp3jzk0xg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:44:31Z' + timestamp: '2024-12-12T00:15:01Z' - torchscript_onnx_tflite: - inference_time: 3637.0 - throughput: 274.95188342040143 + inference_time: 3624.0 + throughput: 275.9381898454746 estimated_peak_memory_range: min: 16384 - max: 19416592 + max: 23885744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 312 - job_id: jp2k4mrxp + job_id: jgn6z2vv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3571.0 - throughput: 280.0336040324839 + inference_time: 3588.0 + throughput: 278.70680044593087 estimated_peak_memory_range: - min: 622592 - max: 6322432 + min: 618496 + max: 6697184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: jgjvolr7g + job_id: jgo2ly64p job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:44:32Z' + timestamp: '2024-12-12T00:15:02Z' - torchscript_onnx_tflite: - inference_time: 2635.0 - throughput: 379.5066413662239 + inference_time: 2636.0 + throughput: 379.3626707132018 estimated_peak_memory_range: min: 16384 - max: 24461184 + max: 23902768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 312 - job_id: jpy1qdorp + job_id: jprvlk3vg job_status: Passed torchscript_onnx_qnn: - inference_time: 2696.0 - throughput: 370.919881305638 + inference_time: 2682.0 + throughput: 372.85607755406414 estimated_peak_memory_range: - min: 0 - max: 23062192 + min: 638976 + max: 23705936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: jped8v775 + job_id: jpv6l3k75 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:44:33Z' + timestamp: '2024-12-12T00:15:03Z' - torchscript_onnx_qnn: - inference_time: 2064.0 - throughput: 484.49612403100775 + inference_time: 2021.0 + throughput: 494.80455220188026 estimated_peak_memory_range: min: 602112 max: 602112 @@ -485,22 +485,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: jglvokye5 - job_status: Passed - torchscript_onnx: - inference_time: 2004.0 - throughput: 499.001996007984 - estimated_peak_memory_range: - min: 17088512 - max: 17088512 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 374 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 374 - job_id: jp1478nkp + job_id: jgke2zxyg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +494,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:44:37Z' + timestamp: '2024-12-12T00:15:07Z' diff --git a/qai_hub_models/models/densenet121_quantized/info.yaml b/qai_hub_models/models/densenet121_quantized/info.yaml index 37aadad5..7260d65c 100644 --- a/qai_hub_models/models/densenet121_quantized/info.yaml +++ b/qai_hub_models/models/densenet121_quantized/info.yaml @@ -11,6 +11,7 @@ use_case: Image Classification tags: - backbone - quantized +imsdk_supported: true research_paper: https://arxiv.org/abs/1608.06993 research_paper_title: Densely Connected Convolutional Networks license: https://github.com/pytorch/vision/blob/main/LICENSE diff --git a/qai_hub_models/models/densenet121_quantized/perf.yaml b/qai_hub_models/models/densenet121_quantized/perf.yaml index d50eda0b..de03c509 100644 --- a/qai_hub_models/models/densenet121_quantized/perf.yaml +++ b/qai_hub_models/models/densenet121_quantized/perf.yaml @@ -50,14 +50,14 @@ aggregated: models: - name: DenseNet-121-Quantized universal_assets: - torchscript_onnx: mn41dv80n + torchscript_onnx: mngg1p4rn performance_metrics: - torchscript_onnx_qnn: - inference_time: 1760.0 - throughput: 568.1818181818181 + inference_time: 1774.0 + throughput: 563.6978579481398 estimated_peak_memory_range: min: 12288 - max: 295787304 + max: 295852624 primary_compute_unit: NPU precision: int8 layer_info: @@ -65,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: j5q6z9lep + job_id: jgke2nmvg job_status: Passed torchscript_onnx: - inference_time: 32348.0 - throughput: 30.913812291331766 + inference_time: 34172.0 + throughput: 29.263724686878145 estimated_peak_memory_range: - min: 10444800 - max: 15022344 + min: 10473472 + max: 14839944 primary_compute_unit: NPU precision: int8 layer_info: @@ -80,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 45 total_layers: 424 - job_id: jgdx87d6p + job_id: jp14nlxnp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -89,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T04:05:38Z' + timestamp: '2024-12-12T01:34:26Z' - torchscript_onnx_qnn: - inference_time: 1224.0 - throughput: 816.9934640522875 + inference_time: 1225.0 + throughput: 816.3265306122449 estimated_peak_memory_range: - min: 0 - max: 26185392 + min: 163840 + max: 26327600 primary_compute_unit: NPU precision: int8 layer_info: @@ -103,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jglvoey25 + job_id: j5q6lkoep job_status: Passed torchscript_onnx: - inference_time: 24977.0 - throughput: 40.0368338871762 + inference_time: 25130.0 + throughput: 39.793076004775166 estimated_peak_memory_range: - min: 11460608 - max: 724222960 + min: 11612160 + max: 723526352 primary_compute_unit: NPU precision: int8 layer_info: @@ -118,7 +118,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 45 total_layers: 424 - job_id: j57ykven5 + job_id: jgdxd9l6p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -127,13 +127,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T04:05:40Z' + timestamp: '2024-12-12T01:34:28Z' - torchscript_onnx_qnn: - inference_time: 1170.0 - throughput: 854.7008547008547 + inference_time: 1201.0 + throughput: 832.6394671107411 estimated_peak_memory_range: min: 0 - max: 28212960 + max: 31314640 primary_compute_unit: NPU precision: int8 layer_info: @@ -141,14 +141,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: j56yrq8np + job_id: jglvyzr25 job_status: Passed torchscript_onnx: - inference_time: 26198.0 - throughput: 38.170852736850144 + inference_time: 26629.0 + throughput: 37.553043674189794 estimated_peak_memory_range: - min: 10891264 - max: 553760080 + min: 12029952 + max: 555558992 primary_compute_unit: NPU precision: int8 layer_info: @@ -156,7 +156,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 45 total_layers: 424 - job_id: jp4lmjy25 + job_id: j57yew3n5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -165,13 +165,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T04:05:42Z' + timestamp: '2024-12-12T01:34:30Z' - torchscript_onnx_qnn: - inference_time: 6682.0 - throughput: 149.655791679138 + inference_time: 6492.0 + throughput: 154.03573629081947 estimated_peak_memory_range: - min: 163840 - max: 8424544 + min: 176128 + max: 7530624 primary_compute_unit: NPU precision: int8 layer_info: @@ -179,7 +179,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jp3jxqzmg + job_id: j56y8jlnp job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -188,13 +188,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T04:05:21Z' + timestamp: '2024-12-12T01:34:09Z' - torchscript_onnx_qnn: - inference_time: 1694.0 - throughput: 590.318772136954 + inference_time: 1697.0 + throughput: 589.2751915144372 estimated_peak_memory_range: - min: 176128 - max: 1330104 + min: 184320 + max: 1360432 primary_compute_unit: NPU precision: int8 layer_info: @@ -202,7 +202,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jgo2oel1p + job_id: jp3jz32mg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -211,13 +211,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T04:05:23Z' + timestamp: '2024-12-12T01:34:11Z' - torchscript_onnx_qnn: - inference_time: 15021.0 - throughput: 66.57346381732242 + inference_time: 15062.0 + throughput: 66.39224538573895 estimated_peak_memory_range: - min: 155648 - max: 5908800 + min: 98304 + max: 10600576 primary_compute_unit: NPU precision: int8 layer_info: @@ -225,7 +225,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jgjvokr1g + job_id: jpv6loxz5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -234,13 +234,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T04:05:27Z' + timestamp: '2024-12-12T01:34:14Z' - torchscript_onnx_qnn: - inference_time: 1694.0 - throughput: 590.318772136954 + inference_time: 1696.0 + throughput: 589.622641509434 estimated_peak_memory_range: - min: 196608 - max: 1382024 + min: 184320 + max: 1434960 primary_compute_unit: NPU precision: int8 layer_info: @@ -248,7 +248,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jped84785 + job_id: jgjvrm41g job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -257,13 +257,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T04:05:28Z' + timestamp: '2024-12-12T01:34:16Z' - torchscript_onnx_qnn: - inference_time: 3100.0 - throughput: 322.5806451612903 + inference_time: 2876.0 + throughput: 347.70514603616135 estimated_peak_memory_range: - min: 0 - max: 5813616 + min: 163840 + max: 6066976 primary_compute_unit: NPU precision: int8 layer_info: @@ -271,7 +271,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jgz38vl45 + job_id: jped71385 job_status: Passed reference_device_info: name: SA8295P ADP @@ -280,13 +280,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T04:05:30Z' + timestamp: '2024-12-12T01:34:18Z' - torchscript_onnx_qnn: - inference_time: 1696.0 - throughput: 589.622641509434 + inference_time: 1698.0 + throughput: 588.9281507656066 estimated_peak_memory_range: - min: 176128 - max: 1906864 + min: 184320 + max: 1451352 primary_compute_unit: NPU precision: int8 layer_info: @@ -294,7 +294,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: j5we8ml45 + job_id: jgz3l9k45 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -303,13 +303,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T04:05:32Z' + timestamp: '2024-12-12T01:34:20Z' - torchscript_onnx_qnn: - inference_time: 2437.0 - throughput: 410.3405826836274 + inference_time: 2460.0 + throughput: 406.5040650406504 estimated_peak_memory_range: - min: 163840 - max: 5699424 + min: 155648 + max: 6106480 primary_compute_unit: NPU precision: int8 layer_info: @@ -317,7 +317,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jg9lk9zmg + job_id: j5welvn45 job_status: Passed reference_device_info: name: SA8775P ADP @@ -326,13 +326,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T04:05:34Z' + timestamp: '2024-12-12T01:34:22Z' - torchscript_onnx_qnn: - inference_time: 2162.0 - throughput: 462.53469010175763 + inference_time: 2127.0 + throughput: 470.14574518100613 estimated_peak_memory_range: min: 163840 - max: 26591664 + max: 27828992 primary_compute_unit: NPU precision: int8 layer_info: @@ -340,7 +340,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jp147qnnp + job_id: jg9lz1emg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -349,13 +349,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T04:05:36Z' + timestamp: '2024-12-12T01:34:24Z' - torchscript_onnx_qnn: - inference_time: 1851.0 - throughput: 540.2485143165857 + inference_time: 1849.0 + throughput: 540.8328826392644 estimated_peak_memory_range: - min: 438272 - max: 438272 + min: 446464 + max: 446464 primary_compute_unit: NPU precision: int8 layer_info: @@ -363,22 +363,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jpv6ezlz5 + job_id: jgo2l0q1p job_status: Passed torchscript_onnx: - inference_time: 34379.0 - throughput: 29.087524360801652 + inference_time: 35708.0 + throughput: 28.004928867480675 estimated_peak_memory_range: - min: 30281728 - max: 30281728 - primary_compute_unit: NPU - precision: int8 + min: 35233792 + max: 35233792 + primary_compute_unit: CPU + precision: fp32 layer_info: - layers_on_npu: 379 + layers_on_npu: 9 layers_on_gpu: 0 layers_on_cpu: 45 - total_layers: 424 - job_id: jpxk3el85 + total_layers: 54 + job_id: jp4lyo025 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -387,4 +387,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T04:05:43Z' + timestamp: '2024-12-12T01:34:31Z' diff --git a/qai_hub_models/models/whisper_small_en/README.md b/qai_hub_models/models/depth_anything/README.md similarity index 53% rename from qai_hub_models/models/whisper_small_en/README.md rename to qai_hub_models/models/depth_anything/README.md index e0b6f4a0..4e2c1ab2 100644 --- a/qai_hub_models/models/whisper_small_en/README.md +++ b/qai_hub_models/models/depth_anything/README.md @@ -1,13 +1,13 @@ [![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md) -# [Whisper-Small-En: Automatic speech recognition (ASR) model for English transcription as well as translation](https://aihub.qualcomm.com/models/whisper_small_en) +# [Depth-Anything: Deep Convolutional Neural Network model for depth estimation](https://aihub.qualcomm.com/models/depth_anything) -OpenAI’s Whisper ASR (Automatic Speech Recognition) model is a state-of-the-art system designed for transcribing spoken language into written text. It exhibits robust performance in realistic, noisy environments, making it highly reliable for real-world applications. Specifically, it excels in long-form transcription, capable of accurately transcribing audio clips up to 30 seconds long. Time to the first token is the encoder's latency, while time to each additional token is decoder's latency, where we assume a mean decoded length specified below. +Depth Anything is designed for estimating depth at each point in an image. -This is based on the implementation of Whisper-Small-En found [here](https://github.com/openai/whisper/tree/main). This repository contains scripts for optimized on-device +This is based on the implementation of Depth-Anything found [here](https://github.com/huggingface/transformers/tree/main/src/transformers/models/depth_anything). This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance -accross various devices, can be found [here](https://aihub.qualcomm.com/models/whisper_small_en). +accross various devices, can be found [here](https://aihub.qualcomm.com/models/depth_anything). [Sign up](https://myaccount.qualcomm.com/signup) to start using Qualcomm AI Hub and run these models on a hosted Qualcomm® device. @@ -18,14 +18,14 @@ accross various devices, can be found [here](https://aihub.qualcomm.com/models/w Install the package via pip: ```bash -pip install "qai_hub_models[whisper_small_en]" +pip install "qai_hub_models[depth_anything]" ``` Once installed, run the following simple CLI demo: ```bash -python -m qai_hub_models.models.whisper_small_en.demo +python -m qai_hub_models.models.depth_anything.demo ``` More details on the CLI tool can be found with the `--help` option. See [demo.py](demo.py) for sample usage of the model including pre/post processing @@ -38,21 +38,21 @@ This repository contains export scripts that produce a model optimized for on-device deployment. This can be run as follows: ```bash -python -m qai_hub_models.models.whisper_small_en.export +python -m qai_hub_models.models.depth_anything.export ``` Additional options are documented with the `--help` option. Note that the above script requires access to Deployment instructions for Qualcomm® AI Hub. ## License -* The license for the original implementation of Whisper-Small-En can be found - [here](https://github.com/openai/whisper/blob/main/LICENSE). +* The license for the original implementation of Depth-Anything can be found + [here](https://github.com/huggingface/transformers/blob/main/LICENSE). * The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References -* [Robust Speech Recognition via Large-Scale Weak Supervision](https://cdn.openai.com/papers/whisper.pdf) -* [Source Model Implementation](https://github.com/openai/whisper/tree/main) +* [Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data](https://arxiv.org/abs/2401.10891) +* [Source Model Implementation](https://github.com/huggingface/transformers/tree/main/src/transformers/models/depth_anything) diff --git a/qai_hub_models/models/depth_anything/__init__.py b/qai_hub_models/models/depth_anything/__init__.py new file mode 100644 index 00000000..9bbb95ea --- /dev/null +++ b/qai_hub_models/models/depth_anything/__init__.py @@ -0,0 +1,10 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from qai_hub_models.models._shared.depth_estimation.app import ( # noqa: F401 + DepthEstimationApp as App, +) + +from .model import MODEL_ID # noqa: F401 +from .model import DepthAnything as Model # noqa: F401 diff --git a/qai_hub_models/models/depth_anything/conftest.py b/qai_hub_models/models/depth_anything/conftest.py new file mode 100644 index 00000000..f0e85c79 --- /dev/null +++ b/qai_hub_models/models/depth_anything/conftest.py @@ -0,0 +1,39 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + +import inspect + +import pytest + +from qai_hub_models.models.depth_anything import Model +from qai_hub_models.utils.testing import skip_clone_repo_check + + +# Instantiate the model only once for all tests. +# Mock from_pretrained to always return the initialized model. +# This speeds up tests and limits memory leaks. +@pytest.fixture(scope="module", autouse=True) +def cached_from_pretrained(): + with pytest.MonkeyPatch.context() as mp: + pretrained_cache = {} + from_pretrained = Model.from_pretrained + sig = inspect.signature(from_pretrained) + + @skip_clone_repo_check + def _cached_from_pretrained(*args, **kwargs): + cache_key = str(args) + str(kwargs) + model = pretrained_cache.get(cache_key, None) + if model: + return model + else: + model = from_pretrained(*args, **kwargs) + pretrained_cache[cache_key] = model + return model + + _cached_from_pretrained.__signature__ = sig + + mp.setattr(Model, "from_pretrained", _cached_from_pretrained) + yield mp diff --git a/qai_hub_models/models/depth_anything/demo.py b/qai_hub_models/models/depth_anything/demo.py new file mode 100644 index 00000000..874d69d9 --- /dev/null +++ b/qai_hub_models/models/depth_anything/demo.py @@ -0,0 +1,24 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- + +from qai_hub_models.models._shared.depth_estimation.demo import depth_estimation_demo +from qai_hub_models.models.depth_anything.model import ( + MODEL_ASSET_VERSION, + MODEL_ID, + DepthAnything, +) +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset + +INPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, "test_input_image.jpg" +) + + +def main(is_test: bool = False): + depth_estimation_demo(DepthAnything, MODEL_ID, INPUT_IMAGE_ADDRESS, is_test) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/depth_anything/export.py b/qai_hub_models/models/depth_anything/export.py new file mode 100644 index 00000000..71ee31f2 --- /dev/null +++ b/qai_hub_models/models/depth_anything/export.py @@ -0,0 +1,215 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import os +import warnings +from pathlib import Path +from typing import Any, Optional, cast + +import qai_hub as hub +import torch + +from qai_hub_models.models.common import ExportResult, TargetRuntime +from qai_hub_models.models.depth_anything import Model +from qai_hub_models.utils.args import ( + export_parser, + get_input_spec_kwargs, + get_model_kwargs, +) +from qai_hub_models.utils.compare import torch_inference +from qai_hub_models.utils.input_spec import make_torch_inputs +from qai_hub_models.utils.printing import ( + print_inference_metrics, + print_on_target_demo_cmd, + print_profile_metrics_from_job, +) +from qai_hub_models.utils.qai_hub_helpers import ( + can_access_qualcomm_ai_hub, + export_without_hub_access, +) + + +def export_model( + device: Optional[str] = None, + chipset: Optional[str] = None, + skip_profiling: bool = False, + skip_inferencing: bool = False, + skip_downloading: bool = False, + skip_summary: bool = False, + output_dir: Optional[str] = None, + target_runtime: TargetRuntime = TargetRuntime.TFLITE, + compile_options: str = "", + profile_options: str = "", + **additional_model_kwargs, +) -> ExportResult | list[str]: + """ + This function executes the following recipe: + + 1. Instantiates a PyTorch model and converts it to a traced TorchScript format + 2. Compiles the model to an asset that can be run on device + 3. Profiles the model performance on a real device + 4. Inferences the model on sample inputs + 5. Downloads the model asset to the local directory + 6. Summarizes the results from profiling and inference + + Each of the last 4 steps can be optionally skipped using the input options. + + Parameters: + device: Device for which to export the model. + Full list of available devices can be found by running `hub.get_devices()`. + Defaults to DEFAULT_DEVICE if not specified. + chipset: If set, will choose a random device with this chipset. + Overrides the `device` argument. + skip_profiling: If set, skips profiling of compiled model on real devices. + skip_inferencing: If set, skips computing on-device outputs from sample data. + skip_downloading: If set, skips downloading of compiled model. + skip_summary: If set, skips waiting for and summarizing results + from profiling and inference. + output_dir: Directory to store generated assets (e.g. compiled model). + Defaults to `/build/`. + target_runtime: Which on-device runtime to target. Default is TFLite. + compile_options: Additional options to pass when submitting the compile job. + profile_options: Additional options to pass when submitting the profile job. + **additional_model_kwargs: Additional optional kwargs used to customize + `model_cls.from_pretrained` and `model.get_input_spec` + + Returns: + A struct of: + * A CompileJob object containing metadata about the compile job submitted to hub. + * An InferenceJob containing metadata about the inference job (None if inferencing skipped). + * A ProfileJob containing metadata about the profile job (None if profiling skipped). + """ + model_name = "depth_anything" + output_path = Path(output_dir or Path.cwd() / "build" / model_name) + if not device and not chipset: + raise ValueError("Device or Chipset must be provided.") + hub_device = hub.Device( + name=device or "", attributes=f"chipset:{chipset}" if chipset else None + ) + if not can_access_qualcomm_ai_hub(): + return export_without_hub_access( + "depth_anything", + "Depth-Anything", + device or f"Device (Chipset {chipset})", + skip_profiling, + skip_inferencing, + skip_downloading, + skip_summary, + output_path, + target_runtime, + compile_options, + profile_options, + ) + + # On-device perf improves with I/O in channel_last format for runtimes + # that execute natively in channel_last format. + use_channel_last_format = target_runtime.channel_last_native_execution + + # 1. Instantiates a PyTorch model and converts it to a traced TorchScript format + model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs)) + input_spec = model.get_input_spec( + **get_input_spec_kwargs(model, additional_model_kwargs) + ) + + # Trace the model + source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) + + # 2. Compiles the model to an asset that can be run on device + model_compile_options = model.get_hub_compile_options( + target_runtime, compile_options, hub_device + ) + print(f"Optimizing model {model_name} to run on-device") + submitted_compile_job = hub.submit_compile_job( + model=source_model, + input_specs=input_spec, + device=hub_device, + name=model_name, + options=model_compile_options, + ) + compile_job = cast(hub.client.CompileJob, submitted_compile_job) + + # 3. Profiles the model performance on a real device + profile_job: Optional[hub.client.ProfileJob] = None + if not skip_profiling: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print(f"Profiling model {model_name} on a hosted device.") + submitted_profile_job = hub.submit_profile_job( + model=compile_job.get_target_model(), + device=hub_device, + name=model_name, + options=profile_options_all, + ) + profile_job = cast(hub.client.ProfileJob, submitted_profile_job) + + # 4. Inferences the model on sample inputs + inference_job: Optional[hub.client.InferenceJob] = None + if not skip_inferencing: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print( + f"Running inference for {model_name} on a hosted device with example inputs." + ) + sample_inputs = model.sample_inputs( + input_spec, use_channel_last_format=use_channel_last_format + ) + submitted_inference_job = hub.submit_inference_job( + model=compile_job.get_target_model(), + inputs=sample_inputs, + device=hub_device, + name=model_name, + options=profile_options_all, + ) + inference_job = cast(hub.client.InferenceJob, submitted_inference_job) + + # 5. Downloads the model asset to the local directory + if not skip_downloading: + os.makedirs(output_path, exist_ok=True) + target_model: hub.Model = compile_job.get_target_model() # type: ignore + target_model.download(str(output_path / model_name)) + + # 6. Summarizes the results from profiling and inference + if not skip_summary and not skip_profiling: + assert profile_job is not None and profile_job.wait().success + profile_data: dict[str, Any] = profile_job.download_profile() # type: ignore + print_profile_metrics_from_job(profile_job, profile_data) + + if not skip_summary and not skip_inferencing: + sample_inputs = model.sample_inputs(use_channel_last_format=False) + torch_out = torch_inference( + model, sample_inputs, return_channel_last_output=use_channel_last_format + ) + assert inference_job is not None and inference_job.wait().success + inference_result: hub.client.DatasetEntries = inference_job.download_output_data() # type: ignore + + print_inference_metrics( + inference_job, inference_result, torch_out, model.get_output_names() + ) + + if not skip_summary: + print_on_target_demo_cmd(compile_job, Path(__file__).parent, hub_device) + + return ExportResult( + compile_job=compile_job, + inference_job=inference_job, + profile_job=profile_job, + ) + + +def main(): + warnings.filterwarnings("ignore") + parser = export_parser(model_cls=Model) + args = parser.parse_args() + export_model(**vars(args)) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/depth_anything/info.yaml b/qai_hub_models/models/depth_anything/info.yaml new file mode 100644 index 00000000..b713c4ce --- /dev/null +++ b/qai_hub_models/models/depth_anything/info.yaml @@ -0,0 +1,35 @@ +name: Depth-Anything +# id must match with the model dir name in qai_hub_models +id: depth_anything +status: public +headline: Deep Convolutional Neural Network model for depth estimation. +domain: Computer Vision +use_case: Depth Estimation +description: Depth Anything is designed for estimating depth at each point in an image. +tags: [] +research_paper: https://arxiv.org/abs/2401.10891 +research_paper_title: 'Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data' +license: https://github.com/huggingface/transformers/blob/main/LICENSE +deploy_license: + https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf +source_repo: https://github.com/huggingface/transformers/tree/main/src/transformers/models/depth_anything +technical_details: + Model checkpoint: DepthAnything_Small + Input resolution: 518x518 + Number of parameters: 24.8M + Model size: 94 MB +applicable_scenarios: + - Anomaly Detection + - Inventory Management +related_models: + - midas + - midas_quantized +form_factors: + - Phone + - Tablet + - IoT +has_static_banner: true +has_animated_banner: true +license_type: mit +deploy_license_type: AI Model Hub License +dataset: [] diff --git a/qai_hub_models/models/depth_anything/model.py b/qai_hub_models/models/depth_anything/model.py new file mode 100644 index 00000000..8c4a792f --- /dev/null +++ b/qai_hub_models/models/depth_anything/model.py @@ -0,0 +1,66 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from __future__ import annotations + +import torch +from torch import nn +from transformers import AutoModelForDepthEstimation + +from qai_hub_models.utils.base_model import BaseModel +from qai_hub_models.utils.image_processing import normalize_image_torchvision + +MODEL_ID = __name__.split(".")[-2] +MODEL_ASSET_VERSION = 2 +DEFAULT_WEIGHTS = "LiheYoung/depth-anything-small-hf" + + +class DepthAnything(BaseModel): + """Exportable DepthAnything Depth Estimation, end-to-end.""" + + def __init__(self, model: nn.Module) -> None: + super().__init__() + self.model = model + + @classmethod + def from_pretrained(cls, ckpt: str = DEFAULT_WEIGHTS) -> DepthAnything: + """Load DepthAnything from a weightfile from Huggingface/Transfomers.""" + net = AutoModelForDepthEstimation.from_pretrained(ckpt) + return cls(net) + + def forward(self, image: torch.Tensor): + """ + Run DepthAnything on `image`, and produce a predicted depth. + + Parameters: + image: Pixel values pre-processed for encoder consumption. + Range: float[0, 1] + 3-channel Color Space: RGB + + Returns: + depth : Shape [batch, 1, 518, 518] + """ + image = normalize_image_torchvision(image) + out = self.model(image, return_dict=False) + return out[0].unsqueeze(1) + + @staticmethod + def get_input_spec( + batch_size: int = 1, + height: int = 518, + width: int = 518, + ): + """ + Returns the input specification (name -> (shape, type). This can be + used to submit profiling job on Qualcomm AI Hub. + """ + return {"image": ((batch_size, 3, height, width), "float32")} + + @staticmethod + def get_output_names() -> list[str]: + return ["depth"] + + @staticmethod + def get_channel_last_inputs() -> list[str]: + return ["image"] diff --git a/qai_hub_models/models/depth_anything/perf.yaml b/qai_hub_models/models/depth_anything/perf.yaml new file mode 100644 index 00000000..6b696138 --- /dev/null +++ b/qai_hub_models/models/depth_anything/perf.yaml @@ -0,0 +1,512 @@ +aggregated: + supported_devices: + - Snapdragon 8 Elite QRD + - Samsung Galaxy S24 + - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ + - Samsung Galaxy S23 + - Samsung Galaxy S23 Ultra + - Samsung Galaxy S23+ + - Samsung Galaxy S22 5G + - Samsung Galaxy S22 Ultra 5G + - Samsung Galaxy S22+ 5G + - Samsung Galaxy Tab S8 + - Xiaomi 12 + - Xiaomi 12 Pro + - Samsung Galaxy S21 + - Samsung Galaxy S21 Ultra + - Samsung Galaxy S21+ + - Snapdragon X Elite CRD + - Snapdragon X Plus 8-Core CRD + - SA8775P ADP + - QCS8450 (Proxy) + - XR2 Gen 2 (Proxy) + - QCS8550 (Proxy) + - SA7255P ADP + - SA8255 (Proxy) + - SA8295P ADP + - SA8650 (Proxy) + supported_chipsets: + - Snapdragon® 8 Elite + - Snapdragon® 8 Gen 3 + - Snapdragon® 8 Gen 2 + - Snapdragon® 8 Gen 1 + - Snapdragon® 888 + - Snapdragon® X Elite + - Snapdragon® X Plus 8-Core + - SA8775P + - QCS8450 Proxy + - QCS8550 Proxy + - SA7255P + - SA8255P Proxy + - SA8295P + - SA8650P Proxy +models: +- name: Depth-Anything + universal_assets: + torchscript_onnx_tflite: mnl6v833n + torchscript_onnx: mnw8e28pn + performance_metrics: + - torchscript_onnx_tflite: + inference_time: 329103.0 + throughput: 3.0385623953595076 + estimated_peak_memory_range: + min: 135168 + max: 111890464 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 635 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 635 + job_id: jped7j185 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 367114.0 + throughput: 2.7239495088719035 + estimated_peak_memory_range: + min: 3268608 + max: 58924656 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 494 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 494 + job_id: jgn6zjyj5 + job_status: Passed + torchscript_onnx: + inference_time: 231904.0 + throughput: 4.3121291568925075 + estimated_peak_memory_range: + min: 172032 + max: 66273696 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 447 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 447 + job_id: jgo2ljq1p + job_status: Passed + reference_device_info: + name: Samsung Galaxy S23 + os: '13' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 2 + timestamp: '2024-12-12T00:14:16Z' + - torchscript_onnx_tflite: + inference_time: 251120.0 + throughput: 3.9821599235425293 + estimated_peak_memory_range: + min: 36864 + max: 261112784 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 635 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 635 + job_id: jgz3l1945 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 287241.0 + throughput: 3.481397154305966 + estimated_peak_memory_range: + min: 0 + max: 271070528 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 494 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 494 + job_id: jprvlzqkg + job_status: Passed + torchscript_onnx: + inference_time: 196175.0 + throughput: 5.097489486427934 + estimated_peak_memory_range: + min: 2150400 + max: 1038150656 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 447 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 447 + job_id: jpv6ljxz5 + job_status: Passed + reference_device_info: + name: Samsung Galaxy S24 + os: '14' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-12-12T00:14:17Z' + - torchscript_onnx_tflite: + inference_time: 235480.0 + throughput: 4.246645150331238 + estimated_peak_memory_range: + min: 1212416 + max: 285058112 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 635 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 635 + job_id: j5weljv45 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 274953.0 + throughput: 3.63698523020298 + estimated_peak_memory_range: + min: 3321856 + max: 296243872 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 494 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 494 + job_id: jp2kr266p + job_status: Passed + torchscript_onnx: + inference_time: 155528.0 + throughput: 6.429710405843321 + estimated_peak_memory_range: + min: 0 + max: 544941152 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 447 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 447 + job_id: jgjvrj41g + job_status: Passed + reference_device_info: + name: Snapdragon 8 Elite QRD + os: '15' + form_factor: Phone + os_name: Android + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-12-12T00:14:19Z' + - torchscript_onnx_tflite: + inference_time: 330925.0 + throughput: 3.0218327415577546 + estimated_peak_memory_range: + min: 36864 + max: 55586992 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 635 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 635 + job_id: jg9lz61mg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 240606.0 + throughput: 4.156172331529555 + estimated_peak_memory_range: + min: 3731456 + max: 4961568 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 494 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 494 + job_id: jpy1o9w0p + job_status: Passed + reference_device_info: + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: QCS8550 Proxy + timestamp: '2024-12-12T00:14:07Z' + - torchscript_onnx_tflite: + inference_time: 1138385.0 + throughput: 0.8784374354897508 + estimated_peak_memory_range: + min: 106496 + max: 281768112 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 635 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 635 + job_id: jp14nrlnp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1009095.0 + throughput: 0.9909869734762337 + estimated_peak_memory_range: + min: 3219456 + max: 13355008 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 494 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 494 + job_id: jp8qelvqp + job_status: Passed + reference_device_info: + name: SA7255P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA7255P + timestamp: '2024-12-12T00:14:09Z' + - torchscript_onnx_tflite: + inference_time: 320338.0 + throughput: 3.1217027015215177 + estimated_peak_memory_range: + min: 61440 + max: 125065176 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 635 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 635 + job_id: jgdxdj96p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 228116.0 + throughput: 4.383734591172912 + estimated_peak_memory_range: + min: 7434240 + max: 8657480 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 494 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 494 + job_id: jgke2jmvg + job_status: Passed + reference_device_info: + name: SA8255 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8255P Proxy + timestamp: '2024-12-12T00:14:11Z' + - torchscript_onnx_tflite: + inference_time: 388239.0 + throughput: 2.575732989215406 + estimated_peak_memory_range: + min: 1200128 + max: 285278144 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 635 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 635 + job_id: j57yeqwn5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 280573.0 + throughput: 3.564134824092126 + estimated_peak_memory_range: + min: 2650112 + max: 8848944 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 494 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 494 + job_id: j5q6ljoep + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-12-12T00:14:12Z' + - torchscript_onnx_tflite: + inference_time: 328502.0 + throughput: 3.0441214969771875 + estimated_peak_memory_range: + min: 1142784 + max: 87000744 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 635 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 635 + job_id: jp4lyzo25 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 228865.0 + throughput: 4.369388067201188 + estimated_peak_memory_range: + min: 3715072 + max: 5102568 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 494 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 494 + job_id: jglvyjr25 + job_status: Passed + reference_device_info: + name: SA8650 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8650P Proxy + timestamp: '2024-12-12T00:14:13Z' + - torchscript_onnx_tflite: + inference_time: 368799.0 + throughput: 2.7115040984384446 + estimated_peak_memory_range: + min: 1216512 + max: 282868512 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 635 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 635 + job_id: jpxklwj85 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 263829.0 + throughput: 3.7903338905124153 + estimated_peak_memory_range: + min: 1810432 + max: 12749984 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 494 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 494 + job_id: j56y8klnp + job_status: Passed + reference_device_info: + name: SA8775P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8775P + timestamp: '2024-12-12T00:14:14Z' + - torchscript_onnx_tflite: + inference_time: 372597.0 + throughput: 2.6838648727713856 + estimated_peak_memory_range: + min: 1228800 + max: 272832800 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 635 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 635 + job_id: j5mn0j27p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 430273.0 + throughput: 2.3241058583736374 + estimated_peak_memory_range: + min: 204800 + max: 289270768 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 494 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 494 + job_id: jp3jzy2mg + job_status: Passed + reference_device_info: + name: QCS8450 (Proxy) + os: '13' + form_factor: Xr + os_name: Android + manufacturer: Qualcomm + chipset: QCS8450 Proxy + timestamp: '2024-12-12T00:14:15Z' + - torchscript_onnx_qnn: + inference_time: 212934.0 + throughput: 4.696290869471292 + estimated_peak_memory_range: + min: 3227648 + max: 3227648 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 494 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 494 + job_id: jp0zmn705 + job_status: Passed + torchscript_onnx: + inference_time: 272145.0 + throughput: 3.674511749251318 + estimated_peak_memory_range: + min: 66723840 + max: 66723840 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 447 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 447 + job_id: jped7j385 + job_status: Passed + reference_device_info: + name: Snapdragon X Elite CRD + os: '11' + form_factor: Compute + os_name: Windows + manufacturer: Qualcomm + chipset: Snapdragon® X Elite + timestamp: '2024-12-12T00:14:20Z' diff --git a/qai_hub_models/models/depth_anything/requirements.txt b/qai_hub_models/models/depth_anything/requirements.txt new file mode 100644 index 00000000..3fde9380 --- /dev/null +++ b/qai_hub_models/models/depth_anything/requirements.txt @@ -0,0 +1 @@ +transformers==4.41.1 diff --git a/qai_hub_models/models/depth_anything/test.py b/qai_hub_models/models/depth_anything/test.py new file mode 100644 index 00000000..a9e64900 --- /dev/null +++ b/qai_hub_models/models/depth_anything/test.py @@ -0,0 +1,55 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +import numpy as np +import pytest + +from qai_hub_models.models._shared.depth_estimation.app import DepthEstimationApp +from qai_hub_models.models.depth_anything.demo import INPUT_IMAGE_ADDRESS +from qai_hub_models.models.depth_anything.demo import main as demo_main +from qai_hub_models.models.depth_anything.model import ( + MODEL_ASSET_VERSION, + MODEL_ID, + DepthAnything, +) +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image +from qai_hub_models.utils.testing import skip_clone_repo_check + +OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, "test_output_image.png" +) + + +# Verify that the output from Torch is as expected. +@skip_clone_repo_check +def test_task(): + (_, _, height, width) = DepthAnything.get_input_spec()["image"][0] + app = DepthEstimationApp(DepthAnything.from_pretrained(), height, width) + original_image = load_image(INPUT_IMAGE_ADDRESS) + output_image = app.estimate_depth(original_image) + output_image_oracle = load_image(OUTPUT_IMAGE_ADDRESS) + + np.testing.assert_allclose( + np.asarray(output_image), np.asarray(output_image_oracle), atol=3 + ) + + +@pytest.mark.trace +@skip_clone_repo_check +def test_trace(): + (_, _, height, width) = DepthAnything.get_input_spec()["image"][0] + traced_model = DepthAnything.from_pretrained().convert_to_torchscript() + app = DepthEstimationApp(traced_model, height, width) + original_image = load_image(INPUT_IMAGE_ADDRESS) + output_image = app.estimate_depth(original_image) + output_image_oracle = load_image(OUTPUT_IMAGE_ADDRESS) + + np.testing.assert_allclose( + np.asarray(output_image), np.asarray(output_image_oracle), atol=3 + ) + + +@skip_clone_repo_check +def test_demo(): + demo_main(is_test=True) diff --git a/qai_hub_models/models/depth_anything_v2/README.md b/qai_hub_models/models/depth_anything_v2/README.md new file mode 100644 index 00000000..a8745ee2 --- /dev/null +++ b/qai_hub_models/models/depth_anything_v2/README.md @@ -0,0 +1,63 @@ +[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md) + + +# [Depth-Anything-V2: Deep Convolutional Neural Network model for depth estimation](https://aihub.qualcomm.com/models/depth_anything_v2) + +Depth Anything is designed for estimating depth at each point in an image. + +This is based on the implementation of Depth-Anything-V2 found [here](https://github.com/huggingface/transformers/tree/main/src/transformers/models/depth_anything). This repository contains scripts for optimized on-device +export suitable to run on Qualcomm® devices. More details on model performance +accross various devices, can be found [here](https://aihub.qualcomm.com/models/depth_anything_v2). + +[Sign up](https://myaccount.qualcomm.com/signup) to start using Qualcomm AI Hub and run these models on a hosted Qualcomm® device. + + + + +## Example & Usage + +Install the package via pip: +```bash +pip install "qai_hub_models[depth_anything_v2]" +``` + + +Once installed, run the following simple CLI demo: + +```bash +python -m qai_hub_models.models.depth_anything_v2.demo +``` +More details on the CLI tool can be found with the `--help` option. See +[demo.py](demo.py) for sample usage of the model including pre/post processing +scripts. Please refer to our [general instructions on using +models](../../../#getting-started) for more usage instructions. + +## Export for on-device deployment + +This repository contains export scripts that produce a model optimized for +on-device deployment. This can be run as follows: + +```bash +python -m qai_hub_models.models.depth_anything_v2.export +``` +Additional options are documented with the `--help` option. Note that the above +script requires access to Deployment instructions for Qualcomm® AI Hub. + + +## License +* The license for the original implementation of Depth-Anything-V2 can be found + [here](https://github.com/huggingface/transformers/blob/main/LICENSE). +* The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) + + +## References +* [Depth Anything V2](https://arxiv.org/abs/2406.09414) +* [Source Model Implementation](https://github.com/huggingface/transformers/tree/main/src/transformers/models/depth_anything) + + + +## Community +* Join [our AI Hub Slack community](https://aihub.qualcomm.com/community/slack) to collaborate, post questions and learn more about on-device AI. +* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com). + + diff --git a/qai_hub_models/models/depth_anything_v2/__init__.py b/qai_hub_models/models/depth_anything_v2/__init__.py new file mode 100644 index 00000000..74067e6a --- /dev/null +++ b/qai_hub_models/models/depth_anything_v2/__init__.py @@ -0,0 +1,10 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from qai_hub_models.models._shared.depth_estimation.app import ( # noqa: F401 + DepthEstimationApp as App, +) + +from .model import MODEL_ID # noqa: F401 +from .model import DepthAnythingV2 as Model # noqa: F401 diff --git a/qai_hub_models/models/depth_anything_v2/conftest.py b/qai_hub_models/models/depth_anything_v2/conftest.py new file mode 100644 index 00000000..83068654 --- /dev/null +++ b/qai_hub_models/models/depth_anything_v2/conftest.py @@ -0,0 +1,39 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + +import inspect + +import pytest + +from qai_hub_models.models.depth_anything_v2 import Model +from qai_hub_models.utils.testing import skip_clone_repo_check + + +# Instantiate the model only once for all tests. +# Mock from_pretrained to always return the initialized model. +# This speeds up tests and limits memory leaks. +@pytest.fixture(scope="module", autouse=True) +def cached_from_pretrained(): + with pytest.MonkeyPatch.context() as mp: + pretrained_cache = {} + from_pretrained = Model.from_pretrained + sig = inspect.signature(from_pretrained) + + @skip_clone_repo_check + def _cached_from_pretrained(*args, **kwargs): + cache_key = str(args) + str(kwargs) + model = pretrained_cache.get(cache_key, None) + if model: + return model + else: + model = from_pretrained(*args, **kwargs) + pretrained_cache[cache_key] = model + return model + + _cached_from_pretrained.__signature__ = sig + + mp.setattr(Model, "from_pretrained", _cached_from_pretrained) + yield mp diff --git a/qai_hub_models/models/depth_anything_v2/demo.py b/qai_hub_models/models/depth_anything_v2/demo.py new file mode 100644 index 00000000..4a46e8fb --- /dev/null +++ b/qai_hub_models/models/depth_anything_v2/demo.py @@ -0,0 +1,24 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- + +from qai_hub_models.models._shared.depth_estimation.demo import depth_estimation_demo +from qai_hub_models.models.depth_anything_v2.model import ( + MODEL_ASSET_VERSION, + MODEL_ID, + DepthAnythingV2, +) +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset + +INPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, "test_input_image.jpg" +) + + +def main(is_test: bool = False): + depth_estimation_demo(DepthAnythingV2, MODEL_ID, INPUT_IMAGE_ADDRESS, is_test) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/depth_anything_v2/export.py b/qai_hub_models/models/depth_anything_v2/export.py new file mode 100644 index 00000000..2c6627af --- /dev/null +++ b/qai_hub_models/models/depth_anything_v2/export.py @@ -0,0 +1,215 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import os +import warnings +from pathlib import Path +from typing import Any, Optional, cast + +import qai_hub as hub +import torch + +from qai_hub_models.models.common import ExportResult, TargetRuntime +from qai_hub_models.models.depth_anything_v2 import Model +from qai_hub_models.utils.args import ( + export_parser, + get_input_spec_kwargs, + get_model_kwargs, +) +from qai_hub_models.utils.compare import torch_inference +from qai_hub_models.utils.input_spec import make_torch_inputs +from qai_hub_models.utils.printing import ( + print_inference_metrics, + print_on_target_demo_cmd, + print_profile_metrics_from_job, +) +from qai_hub_models.utils.qai_hub_helpers import ( + can_access_qualcomm_ai_hub, + export_without_hub_access, +) + + +def export_model( + device: Optional[str] = None, + chipset: Optional[str] = None, + skip_profiling: bool = False, + skip_inferencing: bool = False, + skip_downloading: bool = False, + skip_summary: bool = False, + output_dir: Optional[str] = None, + target_runtime: TargetRuntime = TargetRuntime.TFLITE, + compile_options: str = "", + profile_options: str = "", + **additional_model_kwargs, +) -> ExportResult | list[str]: + """ + This function executes the following recipe: + + 1. Instantiates a PyTorch model and converts it to a traced TorchScript format + 2. Compiles the model to an asset that can be run on device + 3. Profiles the model performance on a real device + 4. Inferences the model on sample inputs + 5. Downloads the model asset to the local directory + 6. Summarizes the results from profiling and inference + + Each of the last 4 steps can be optionally skipped using the input options. + + Parameters: + device: Device for which to export the model. + Full list of available devices can be found by running `hub.get_devices()`. + Defaults to DEFAULT_DEVICE if not specified. + chipset: If set, will choose a random device with this chipset. + Overrides the `device` argument. + skip_profiling: If set, skips profiling of compiled model on real devices. + skip_inferencing: If set, skips computing on-device outputs from sample data. + skip_downloading: If set, skips downloading of compiled model. + skip_summary: If set, skips waiting for and summarizing results + from profiling and inference. + output_dir: Directory to store generated assets (e.g. compiled model). + Defaults to `/build/`. + target_runtime: Which on-device runtime to target. Default is TFLite. + compile_options: Additional options to pass when submitting the compile job. + profile_options: Additional options to pass when submitting the profile job. + **additional_model_kwargs: Additional optional kwargs used to customize + `model_cls.from_pretrained` and `model.get_input_spec` + + Returns: + A struct of: + * A CompileJob object containing metadata about the compile job submitted to hub. + * An InferenceJob containing metadata about the inference job (None if inferencing skipped). + * A ProfileJob containing metadata about the profile job (None if profiling skipped). + """ + model_name = "depth_anything_v2" + output_path = Path(output_dir or Path.cwd() / "build" / model_name) + if not device and not chipset: + raise ValueError("Device or Chipset must be provided.") + hub_device = hub.Device( + name=device or "", attributes=f"chipset:{chipset}" if chipset else None + ) + if not can_access_qualcomm_ai_hub(): + return export_without_hub_access( + "depth_anything_v2", + "Depth-Anything-V2", + device or f"Device (Chipset {chipset})", + skip_profiling, + skip_inferencing, + skip_downloading, + skip_summary, + output_path, + target_runtime, + compile_options, + profile_options, + ) + + # On-device perf improves with I/O in channel_last format for runtimes + # that execute natively in channel_last format. + use_channel_last_format = target_runtime.channel_last_native_execution + + # 1. Instantiates a PyTorch model and converts it to a traced TorchScript format + model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs)) + input_spec = model.get_input_spec( + **get_input_spec_kwargs(model, additional_model_kwargs) + ) + + # Trace the model + source_model = torch.jit.trace(model.to("cpu"), make_torch_inputs(input_spec)) + + # 2. Compiles the model to an asset that can be run on device + model_compile_options = model.get_hub_compile_options( + target_runtime, compile_options, hub_device + ) + print(f"Optimizing model {model_name} to run on-device") + submitted_compile_job = hub.submit_compile_job( + model=source_model, + input_specs=input_spec, + device=hub_device, + name=model_name, + options=model_compile_options, + ) + compile_job = cast(hub.client.CompileJob, submitted_compile_job) + + # 3. Profiles the model performance on a real device + profile_job: Optional[hub.client.ProfileJob] = None + if not skip_profiling: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print(f"Profiling model {model_name} on a hosted device.") + submitted_profile_job = hub.submit_profile_job( + model=compile_job.get_target_model(), + device=hub_device, + name=model_name, + options=profile_options_all, + ) + profile_job = cast(hub.client.ProfileJob, submitted_profile_job) + + # 4. Inferences the model on sample inputs + inference_job: Optional[hub.client.InferenceJob] = None + if not skip_inferencing: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print( + f"Running inference for {model_name} on a hosted device with example inputs." + ) + sample_inputs = model.sample_inputs( + input_spec, use_channel_last_format=use_channel_last_format + ) + submitted_inference_job = hub.submit_inference_job( + model=compile_job.get_target_model(), + inputs=sample_inputs, + device=hub_device, + name=model_name, + options=profile_options_all, + ) + inference_job = cast(hub.client.InferenceJob, submitted_inference_job) + + # 5. Downloads the model asset to the local directory + if not skip_downloading: + os.makedirs(output_path, exist_ok=True) + target_model: hub.Model = compile_job.get_target_model() # type: ignore + target_model.download(str(output_path / model_name)) + + # 6. Summarizes the results from profiling and inference + if not skip_summary and not skip_profiling: + assert profile_job is not None and profile_job.wait().success + profile_data: dict[str, Any] = profile_job.download_profile() # type: ignore + print_profile_metrics_from_job(profile_job, profile_data) + + if not skip_summary and not skip_inferencing: + sample_inputs = model.sample_inputs(use_channel_last_format=False) + torch_out = torch_inference( + model, sample_inputs, return_channel_last_output=use_channel_last_format + ) + assert inference_job is not None and inference_job.wait().success + inference_result: hub.client.DatasetEntries = inference_job.download_output_data() # type: ignore + + print_inference_metrics( + inference_job, inference_result, torch_out, model.get_output_names() + ) + + if not skip_summary: + print_on_target_demo_cmd(compile_job, Path(__file__).parent, hub_device) + + return ExportResult( + compile_job=compile_job, + inference_job=inference_job, + profile_job=profile_job, + ) + + +def main(): + warnings.filterwarnings("ignore") + parser = export_parser(model_cls=Model) + args = parser.parse_args() + export_model(**vars(args)) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/depth_anything_v2/info.yaml b/qai_hub_models/models/depth_anything_v2/info.yaml new file mode 100644 index 00000000..387dfe23 --- /dev/null +++ b/qai_hub_models/models/depth_anything_v2/info.yaml @@ -0,0 +1,35 @@ +name: Depth-Anything-V2 +# id must match with the model dir name in qai_hub_models +id: depth_anything_v2 +status: public +headline: Deep Convolutional Neural Network model for depth estimation. +domain: Computer Vision +use_case: Depth Estimation +description: Depth Anything is designed for estimating depth at each point in an image. +tags: [] +research_paper: https://arxiv.org/abs/2406.09414 +research_paper_title: 'Depth Anything V2' +license: https://github.com/huggingface/transformers/blob/main/LICENSE +deploy_license: + https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf +source_repo: https://github.com/huggingface/transformers/tree/main/src/transformers/models/depth_anything +technical_details: + Model checkpoint: DepthAnything_V2_Small + Input resolution: 518x518 + Number of parameters: 24.8M + Model size: 94 MB +applicable_scenarios: + - Anomaly Detection + - Inventory Management +related_models: + - midas + - midas_quantized +form_factors: + - Phone + - Tablet + - IoT +has_static_banner: true +has_animated_banner: true +license_type: mit +deploy_license_type: AI Model Hub License +dataset: [] diff --git a/qai_hub_models/models/depth_anything_v2/model.py b/qai_hub_models/models/depth_anything_v2/model.py new file mode 100644 index 00000000..57209bb2 --- /dev/null +++ b/qai_hub_models/models/depth_anything_v2/model.py @@ -0,0 +1,66 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from __future__ import annotations + +import torch +from torch import nn +from transformers import AutoModelForDepthEstimation + +from qai_hub_models.utils.base_model import BaseModel +from qai_hub_models.utils.image_processing import normalize_image_torchvision + +MODEL_ID = __name__.split(".")[-2] +MODEL_ASSET_VERSION = 1 +DEFAULT_WEIGHTS = "depth-anything/Depth-Anything-V2-Small-hf" + + +class DepthAnythingV2(BaseModel): + """Exportable DepthAnythingV2 Depth Estimation, end-to-end.""" + + def __init__(self, model: nn.Module) -> None: + super().__init__() + self.model = model + + @classmethod + def from_pretrained(cls, ckpt: str = DEFAULT_WEIGHTS) -> DepthAnythingV2: + """Load DepthAnythingV2 from a weightfile from Huggingface/Transfomers.""" + net = AutoModelForDepthEstimation.from_pretrained(ckpt) + return cls(net) + + def forward(self, image: torch.Tensor): + """ + Run DepthAnythingV2 on `image`, and produce a predicted depth. + + Parameters: + image: Pixel values pre-processed for encoder consumption. + Range: float[0, 1] + 3-channel Color Space: RGB + + Returns: + depth : Shape [batch, 1, 518, 518] + """ + image = normalize_image_torchvision(image) + out = self.model(image, return_dict=False) + return out[0].unsqueeze(1) + + @staticmethod + def get_input_spec( + batch_size: int = 1, + height: int = 518, + width: int = 518, + ): + """ + Returns the input specification (name -> (shape, type). This can be + used to submit profiling job on Qualcomm AI Hub. + """ + return {"image": ((batch_size, 3, height, width), "float32")} + + @staticmethod + def get_output_names() -> list[str]: + return ["depth"] + + @staticmethod + def get_channel_last_inputs() -> list[str]: + return ["image"] diff --git a/qai_hub_models/models/depth_anything_v2/perf.yaml b/qai_hub_models/models/depth_anything_v2/perf.yaml new file mode 100644 index 00000000..a702f94f --- /dev/null +++ b/qai_hub_models/models/depth_anything_v2/perf.yaml @@ -0,0 +1,512 @@ +aggregated: + supported_devices: + - Snapdragon 8 Elite QRD + - Samsung Galaxy S24 + - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ + - Samsung Galaxy S23 + - Samsung Galaxy S23 Ultra + - Samsung Galaxy S23+ + - Samsung Galaxy S22 5G + - Samsung Galaxy S22 Ultra 5G + - Samsung Galaxy S22+ 5G + - Samsung Galaxy Tab S8 + - Xiaomi 12 + - Xiaomi 12 Pro + - Samsung Galaxy S21 + - Samsung Galaxy S21 Ultra + - Samsung Galaxy S21+ + - Snapdragon X Elite CRD + - Snapdragon X Plus 8-Core CRD + - SA8775P ADP + - QCS8450 (Proxy) + - XR2 Gen 2 (Proxy) + - QCS8550 (Proxy) + - SA7255P ADP + - SA8255 (Proxy) + - SA8295P ADP + - SA8650 (Proxy) + supported_chipsets: + - Snapdragon® 8 Elite + - Snapdragon® 8 Gen 3 + - Snapdragon® 8 Gen 2 + - Snapdragon® 8 Gen 1 + - Snapdragon® 888 + - Snapdragon® X Elite + - Snapdragon® X Plus 8-Core + - SA8775P + - QCS8450 Proxy + - QCS8550 Proxy + - SA7255P + - SA8255P Proxy + - SA8295P + - SA8650P Proxy +models: +- name: Depth-Anything-V2 + universal_assets: + torchscript_onnx_tflite: mqe7x57vm + torchscript_onnx: mq214r17m + performance_metrics: + - torchscript_onnx_tflite: + inference_time: 329135.0 + throughput: 3.0382669725188753 + estimated_peak_memory_range: + min: 0 + max: 89882144 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 635 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 635 + job_id: j5q6lj8ep + job_status: Passed + torchscript_onnx_qnn: + inference_time: 378932.0 + throughput: 2.638995914834324 + estimated_peak_memory_range: + min: 3284992 + max: 80947672 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 494 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 494 + job_id: jg9lz6vmg + job_status: Passed + torchscript_onnx: + inference_time: 227297.0 + throughput: 4.399530130182097 + estimated_peak_memory_range: + min: 200704 + max: 65746736 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 447 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 447 + job_id: jp0zmnq05 + job_status: Passed + reference_device_info: + name: Samsung Galaxy S23 + os: '13' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 2 + timestamp: '2024-12-12T00:13:23Z' + - torchscript_onnx_tflite: + inference_time: 251124.0 + throughput: 3.9820964941622465 + estimated_peak_memory_range: + min: 16384 + max: 260057328 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 635 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 635 + job_id: jglvyjn25 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 287780.0 + throughput: 3.4748766418792134 + estimated_peak_memory_range: + min: 3235840 + max: 274300336 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 494 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 494 + job_id: jp14nr0np + job_status: Passed + torchscript_onnx: + inference_time: 187808.0 + throughput: 5.324586812063384 + estimated_peak_memory_range: + min: 0 + max: 1033638080 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 447 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 447 + job_id: jgke2jnvg + job_status: Passed + reference_device_info: + name: Samsung Galaxy S24 + os: '14' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-12-12T00:13:24Z' + - torchscript_onnx_tflite: + inference_time: 236199.0 + throughput: 4.233718178315742 + estimated_peak_memory_range: + min: 704512 + max: 283742752 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 635 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 635 + job_id: j56y8k6np + job_status: Passed + torchscript_onnx_qnn: + inference_time: 240939.0 + throughput: 4.150428116660233 + estimated_peak_memory_range: + min: 3330048 + max: 295823408 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 494 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 494 + job_id: jgdxdjw6p + job_status: Passed + torchscript_onnx: + inference_time: 156392.0 + throughput: 6.394188961072178 + estimated_peak_memory_range: + min: 0 + max: 544524112 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 447 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 447 + job_id: j5q6ljkep + job_status: Passed + reference_device_info: + name: Snapdragon 8 Elite QRD + os: '15' + form_factor: Phone + os_name: Android + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-12-12T00:13:25Z' + - torchscript_onnx_tflite: + inference_time: 329447.0 + throughput: 3.035389607433062 + estimated_peak_memory_range: + min: 65536 + max: 49197752 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 635 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 635 + job_id: jp3jzykmg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 240330.0 + throughput: 4.160945366787334 + estimated_peak_memory_range: + min: 3682304 + max: 5010088 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 494 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 494 + job_id: j57yeqzn5 + job_status: Passed + reference_device_info: + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: QCS8550 Proxy + timestamp: '2024-12-12T00:13:14Z' + - torchscript_onnx_tflite: + inference_time: 1138667.0 + throughput: 0.8782198834250927 + estimated_peak_memory_range: + min: 81920 + max: 280528368 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 635 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 635 + job_id: jgo2ljy1p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1011986.0 + throughput: 0.9881559626318941 + estimated_peak_memory_range: + min: 2486272 + max: 12775264 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 494 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 494 + job_id: jpxklwv85 + job_status: Passed + reference_device_info: + name: SA7255P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA7255P + timestamp: '2024-12-12T00:13:16Z' + - torchscript_onnx_tflite: + inference_time: 331644.0 + throughput: 3.0152814463702042 + estimated_peak_memory_range: + min: 36864 + max: 50044752 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 635 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 635 + job_id: jpv6lj3z5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 231243.0 + throughput: 4.32445522675281 + estimated_peak_memory_range: + min: 3674112 + max: 5112992 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 494 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 494 + job_id: j5mn0jr7p + job_status: Passed + reference_device_info: + name: SA8255 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8255P Proxy + timestamp: '2024-12-12T00:13:17Z' + - torchscript_onnx_tflite: + inference_time: 388927.0 + throughput: 2.5711765961221515 + estimated_peak_memory_range: + min: 1204224 + max: 287280608 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 635 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 635 + job_id: jgjvrjx1g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 280479.0 + throughput: 3.56532931164187 + estimated_peak_memory_range: + min: 6172672 + max: 12390384 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 494 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 494 + job_id: jgn6zj2j5 + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-12-12T00:13:18Z' + - torchscript_onnx_tflite: + inference_time: 329235.0 + throughput: 3.0373441462784943 + estimated_peak_memory_range: + min: 1204224 + max: 59196800 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 635 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 635 + job_id: jped7j985 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 229608.0 + throughput: 4.355248946029755 + estimated_peak_memory_range: + min: 3735552 + max: 5120024 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 494 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 494 + job_id: jprvlzkkg + job_status: Passed + reference_device_info: + name: SA8650 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8650P Proxy + timestamp: '2024-12-12T00:13:20Z' + - torchscript_onnx_tflite: + inference_time: 368900.0 + throughput: 2.7107617240444566 + estimated_peak_memory_range: + min: 1122304 + max: 282491344 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 635 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 635 + job_id: jgz3l1e45 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 264090.0 + throughput: 3.7865879056382292 + estimated_peak_memory_range: + min: 2830336 + max: 13807536 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 494 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 494 + job_id: jp2kr286p + job_status: Passed + reference_device_info: + name: SA8775P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8775P + timestamp: '2024-12-12T00:13:21Z' + - torchscript_onnx_tflite: + inference_time: 361044.0 + throughput: 2.76974551578201 + estimated_peak_memory_range: + min: 1220608 + max: 273707104 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 635 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 635 + job_id: j5weljo45 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 439422.0 + throughput: 2.275716736986314 + estimated_peak_memory_range: + min: 2297856 + max: 291186496 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 494 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 494 + job_id: jpy1o9e0p + job_status: Passed + reference_device_info: + name: QCS8450 (Proxy) + os: '13' + form_factor: Xr + os_name: Android + manufacturer: Qualcomm + chipset: QCS8450 Proxy + timestamp: '2024-12-12T00:13:22Z' + - torchscript_onnx_qnn: + inference_time: 221375.0 + throughput: 4.517221908526256 + estimated_peak_memory_range: + min: 3227648 + max: 3227648 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 494 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 494 + job_id: jp4lyzq25 + job_status: Passed + torchscript_onnx: + inference_time: 272662.0 + throughput: 3.6675444323007973 + estimated_peak_memory_range: + min: 65101824 + max: 65101824 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 447 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 447 + job_id: jglvyjz25 + job_status: Passed + reference_device_info: + name: Snapdragon X Elite CRD + os: '11' + form_factor: Compute + os_name: Windows + manufacturer: Qualcomm + chipset: Snapdragon® X Elite + timestamp: '2024-12-12T00:13:26Z' diff --git a/qai_hub_models/models/depth_anything_v2/requirements.txt b/qai_hub_models/models/depth_anything_v2/requirements.txt new file mode 100644 index 00000000..3fde9380 --- /dev/null +++ b/qai_hub_models/models/depth_anything_v2/requirements.txt @@ -0,0 +1 @@ +transformers==4.41.1 diff --git a/qai_hub_models/models/depth_anything_v2/test.py b/qai_hub_models/models/depth_anything_v2/test.py new file mode 100644 index 00000000..13fb9bc8 --- /dev/null +++ b/qai_hub_models/models/depth_anything_v2/test.py @@ -0,0 +1,55 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +import numpy as np +import pytest + +from qai_hub_models.models._shared.depth_estimation.app import DepthEstimationApp +from qai_hub_models.models.depth_anything_v2.demo import INPUT_IMAGE_ADDRESS +from qai_hub_models.models.depth_anything_v2.demo import main as demo_main +from qai_hub_models.models.depth_anything_v2.model import ( + MODEL_ASSET_VERSION, + MODEL_ID, + DepthAnythingV2, +) +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image +from qai_hub_models.utils.testing import skip_clone_repo_check + +OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, "test_output_image.png" +) + + +# Verify that the output from Torch is as expected. +@skip_clone_repo_check +def test_task(): + (_, _, height, width) = DepthAnythingV2.get_input_spec()["image"][0] + app = DepthEstimationApp(DepthAnythingV2.from_pretrained(), height, width) + original_image = load_image(INPUT_IMAGE_ADDRESS) + output_image = app.estimate_depth(original_image) + output_image_oracle = load_image(OUTPUT_IMAGE_ADDRESS) + + np.testing.assert_allclose( + np.asarray(output_image), np.asarray(output_image_oracle), atol=3 + ) + + +@pytest.mark.trace +@skip_clone_repo_check +def test_trace(): + (_, _, height, width) = DepthAnythingV2.get_input_spec()["image"][0] + traced_model = DepthAnythingV2.from_pretrained().convert_to_torchscript() + app = DepthEstimationApp(traced_model, height, width) + original_image = load_image(INPUT_IMAGE_ADDRESS) + output_image = app.estimate_depth(original_image) + output_image_oracle = load_image(OUTPUT_IMAGE_ADDRESS) + + np.testing.assert_allclose( + np.asarray(output_image), np.asarray(output_image_oracle), atol=3 + ) + + +@skip_clone_repo_check +def test_demo(): + demo_main(is_test=True) diff --git a/qai_hub_models/models/detr_resnet101/export.py b/qai_hub_models/models/detr_resnet101/export.py index e5f1a9f7..58513bdd 100644 --- a/qai_hub_models/models/detr_resnet101/export.py +++ b/qai_hub_models/models/detr_resnet101/export.py @@ -206,7 +206,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/detr_resnet101/perf.yaml b/qai_hub_models/models/detr_resnet101/perf.yaml index 41c9916f..9f91520a 100644 --- a/qai_hub_models/models/detr_resnet101/perf.yaml +++ b/qai_hub_models/models/detr_resnet101/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: DETR-ResNet101 universal_assets: - torchscript_onnx_tflite: mm5dwep6n - torchscript_onnx: mnwe18rrn + torchscript_onnx_tflite: mqkvk9gkm + torchscript_onnx: mq214r4wm performance_metrics: - torchscript_onnx_tflite: - inference_time: 15282.0 - throughput: 65.43646119617851 + inference_time: 15193.0 + throughput: 65.81978542749951 estimated_peak_memory_range: - min: 77824 - max: 31314288 + min: 49152 + max: 33377376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: jgz387n45 + job_id: jprvlzrkg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 16162.0 + throughput: 61.873530503650535 + estimated_peak_memory_range: + min: 2785280 + max: 33143592 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 922 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 922 + job_id: jgo2ljj1p job_status: Passed torchscript_onnx: - inference_time: 16595.0 - throughput: 60.25911419102139 + inference_time: 16539.0 + throughput: 60.46314771146986 estimated_peak_memory_range: - min: 45056 - max: 133710784 + min: 49152 + max: 133688240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 886 - job_id: jgjvol71g + job_id: jpxklww85 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -84,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:43:47Z' + timestamp: '2024-12-12T00:12:29Z' - torchscript_onnx_tflite: - inference_time: 10948.0 - throughput: 91.34088417975886 + inference_time: 10965.0 + throughput: 91.19927040583676 estimated_peak_memory_range: - min: 19607552 - max: 102646992 + min: 69632 + max: 88611712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -98,14 +113,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: j5we89445 + job_id: jp2kr216p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 11502.0 + throughput: 86.9414014953921 + estimated_peak_memory_range: + min: 2785280 + max: 85858768 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 922 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 922 + job_id: jpv6ljjz5 job_status: Passed torchscript_onnx: - inference_time: 12246.0 - throughput: 81.65931732810714 + inference_time: 12154.0 + throughput: 82.27743952608195 estimated_peak_memory_range: - min: 2904064 - max: 281125216 + min: 2924544 + max: 279912544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 886 - job_id: jped8vz85 + job_id: j5mn0jj7p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -122,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:43:49Z' + timestamp: '2024-12-12T00:12:30Z' - torchscript_onnx_tflite: - inference_time: 8886.0 - throughput: 112.53657438667567 + inference_time: 10663.0 + throughput: 93.78223764419019 estimated_peak_memory_range: - min: 53248 - max: 87259840 + min: 49152 + max: 88602416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,14 +166,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: jg9lk4dmg + job_id: jpy1o9l0p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 11205.0 + throughput: 89.2458723784025 + estimated_peak_memory_range: + min: 2777088 + max: 85841664 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 922 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 922 + job_id: jgjvrjj1g job_status: Passed torchscript_onnx: - inference_time: 10006.0 - throughput: 99.94003597841295 + inference_time: 9973.0 + throughput: 100.2707309736288 estimated_peak_memory_range: min: 2863104 - max: 117055328 + max: 117992368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -151,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 886 - job_id: jgz387m45 + job_id: jgn6zjjj5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -160,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:43:50Z' + timestamp: '2024-12-12T00:12:31Z' - torchscript_onnx_tflite: - inference_time: 15178.0 - throughput: 65.88483331137172 + inference_time: 15224.0 + throughput: 65.68575932737782 estimated_peak_memory_range: - min: 73728 - max: 26169432 + min: 20480 + max: 31982736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,7 +219,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: jgdx8v26p + job_id: jp0zmnn05 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 15021.0 + throughput: 66.57346381732242 + estimated_peak_memory_range: + min: 2818048 + max: 3999224 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 922 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 922 + job_id: jped7jj85 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -183,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:43:25Z' + timestamp: '2024-12-12T00:12:20Z' - torchscript_onnx_tflite: - inference_time: 544833.0 - throughput: 1.8354247998928113 + inference_time: 544796.0 + throughput: 1.8355494533733727 estimated_peak_memory_range: - min: 16384 - max: 87759264 + min: 147456 + max: 87813072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -197,7 +257,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: j57ykd9n5 + job_id: jp8qellqp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 545791.0 + throughput: 1.8322031693450422 + estimated_peak_memory_range: + min: 2363392 + max: 12617488 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 922 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 922 + job_id: j5weljj45 job_status: Passed reference_device_info: name: SA7255P ADP @@ -206,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:43:26Z' + timestamp: '2024-12-12T00:12:22Z' - torchscript_onnx_tflite: - inference_time: 15401.0 - throughput: 64.9308486461918 + inference_time: 15305.0 + throughput: 65.33812479581836 estimated_peak_memory_range: - min: 49152 - max: 25431080 + min: 90112 + max: 26527320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -220,7 +295,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: jp4lmw325 + job_id: jgke2jjvg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 15011.0 + throughput: 66.61781360335753 + estimated_peak_memory_range: + min: 5672960 + max: 6889296 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 922 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 922 + job_id: jg9lz66mg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -229,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:43:28Z' + timestamp: '2024-12-12T00:12:23Z' - torchscript_onnx_tflite: - inference_time: 23978.0 - throughput: 41.70489615480857 + inference_time: 23967.0 + throughput: 41.7240372178412 estimated_peak_memory_range: - min: 77824 - max: 53892288 + min: 73728 + max: 54645232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -243,7 +333,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: jpxk31x85 + job_id: j5q6ljjep + job_status: Passed + torchscript_onnx_qnn: + inference_time: 24409.0 + throughput: 40.96849522717031 + estimated_peak_memory_range: + min: 81920 + max: 6169840 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 922 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 922 + job_id: jp14nrrnp job_status: Passed reference_device_info: name: SA8295P ADP @@ -252,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:43:29Z' + timestamp: '2024-12-12T00:12:25Z' - torchscript_onnx_tflite: - inference_time: 15180.0 - throughput: 65.87615283267458 + inference_time: 15274.0 + throughput: 65.470734581642 estimated_peak_memory_range: - min: 45056 - max: 26098672 + min: 57344 + max: 33703696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -266,7 +371,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: j5mnoz87p + job_id: jglvyjj25 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 14953.0 + throughput: 66.87621213134489 + estimated_peak_memory_range: + min: 6148096 + max: 7862360 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 922 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 922 + job_id: jgdxdjj6p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -275,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:43:30Z' + timestamp: '2024-12-12T00:12:26Z' - torchscript_onnx_tflite: - inference_time: 27867.0 - throughput: 35.88473822083468 + inference_time: 27835.0 + throughput: 35.92599245554158 estimated_peak_memory_range: - min: 32768 - max: 84662320 + min: 77824 + max: 87665264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -289,7 +409,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: jgn6oekj5 + job_id: j56y8kknp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 27293.0 + throughput: 36.639431356025355 + estimated_peak_memory_range: + min: 2383872 + max: 8163568 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 922 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 922 + job_id: j57yeqqn5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -298,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:43:31Z' + timestamp: '2024-12-12T00:12:27Z' - torchscript_onnx_tflite: - inference_time: 21134.0 - throughput: 47.31711933377496 + inference_time: 21048.0 + throughput: 47.51045229950589 estimated_peak_memory_range: - min: 77824 - max: 53453056 + min: 0 + max: 57247392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -312,7 +447,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: jprvoywkg + job_id: jp3jzyymg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 22235.0 + throughput: 44.974139869575 + estimated_peak_memory_range: + min: 2781184 + max: 58839728 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 922 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 922 + job_id: jp4lyzz25 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -321,13 +471,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:43:32Z' - - torchscript_onnx: - inference_time: 18679.0 - throughput: 53.5360565340757 + timestamp: '2024-12-12T00:12:28Z' + - torchscript_onnx_qnn: + inference_time: 15234.0 + throughput: 65.64264145989235 + estimated_peak_memory_range: + min: 2768896 + max: 2768896 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 922 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 922 + job_id: jgz3l1145 + job_status: Passed + torchscript_onnx: + inference_time: 18641.0 + throughput: 53.64519070865297 estimated_peak_memory_range: - min: 121516032 - max: 121516032 + min: 121597952 + max: 121597952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -335,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 886 - job_id: j5we89745 + job_id: jprvlzzkg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -344,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:43:51Z' + timestamp: '2024-12-12T00:12:32Z' diff --git a/qai_hub_models/models/detr_resnet101_dc5/export.py b/qai_hub_models/models/detr_resnet101_dc5/export.py index 9bf3d8b6..30c9ea8a 100644 --- a/qai_hub_models/models/detr_resnet101_dc5/export.py +++ b/qai_hub_models/models/detr_resnet101_dc5/export.py @@ -206,7 +206,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/detr_resnet101_dc5/perf.yaml b/qai_hub_models/models/detr_resnet101_dc5/perf.yaml index f79193f7..19ff3a81 100644 --- a/qai_hub_models/models/detr_resnet101_dc5/perf.yaml +++ b/qai_hub_models/models/detr_resnet101_dc5/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: DETR-ResNet101-DC5 universal_assets: - torchscript_onnx_tflite: mmx7ze9rq - torchscript_onnx: mn1z8w7vm + torchscript_onnx_tflite: mqkvk9lzm + torchscript_onnx: mqv6470xm performance_metrics: - torchscript_onnx_tflite: - inference_time: 92281.0 - throughput: 10.836466878338987 + inference_time: 92657.0 + throughput: 10.792492742048632 estimated_peak_memory_range: - min: 57344 - max: 56761104 + min: 94208 + max: 62798896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 857 - job_id: jp3jxmwmg + job_id: jpxklw8l5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 92263.0 + throughput: 10.838581012973782 + estimated_peak_memory_range: + min: 65536 + max: 56243264 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 922 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 922 + job_id: jglvyjdm5 job_status: Passed torchscript_onnx: - inference_time: 67799.0 - throughput: 14.749480080827151 + inference_time: 67908.0 + throughput: 14.725805501560936 estimated_peak_memory_range: - min: 77824 - max: 134127528 + min: 131072 + max: 134618336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 886 - job_id: j5q6z2vep + job_id: j5weljk45 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -84,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:42:50Z' + timestamp: '2024-12-12T00:11:30Z' - torchscript_onnx_tflite: - inference_time: 65059.0 - throughput: 15.37066355154552 + inference_time: 65718.0 + throughput: 15.216531239538634 estimated_peak_memory_range: - min: 73728 - max: 171461552 + min: 90112 + max: 168898560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -98,14 +113,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 857 - job_id: jgo2ov41p + job_id: j5mn0j19p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 66408.0 + throughput: 15.058426695578845 + estimated_peak_memory_range: + min: 2801664 + max: 173731584 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 922 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 922 + job_id: j56y8kxyp job_status: Passed torchscript_onnx: - inference_time: 56421.0 - throughput: 17.723897130501054 + inference_time: 56611.0 + throughput: 17.66441151013054 estimated_peak_memory_range: - min: 2662400 - max: 563628864 + min: 3026944 + max: 560515856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 886 - job_id: jglvokl25 + job_id: jg9lz6rmg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -122,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:42:52Z' + timestamp: '2024-12-12T00:11:32Z' - torchscript_onnx_tflite: - inference_time: 61473.0 - throughput: 16.26730434499699 + inference_time: 61640.0 + throughput: 16.223231667748216 estimated_peak_memory_range: - min: 102400 - max: 182591008 + min: 12288 + max: 179607776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,14 +166,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 857 - job_id: jpv6ew9z5 + job_id: jgn6zjdq5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 63111.0 + throughput: 15.845098318835069 + estimated_peak_memory_range: + min: 2830336 + max: 183733680 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 922 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 922 + job_id: jp3jzydng job_status: Passed torchscript_onnx: - inference_time: 50613.0 - throughput: 19.757769742951417 + inference_time: 50394.0 + throughput: 19.84363217843394 estimated_peak_memory_range: - min: 2928640 - max: 313542448 + min: 2949120 + max: 314321584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -151,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 886 - job_id: j56yr1wnp + job_id: jp14nr9np job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -160,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:42:53Z' + timestamp: '2024-12-12T00:11:33Z' - torchscript_onnx_tflite: - inference_time: 81984.0 - throughput: 12.197501951600312 + inference_time: 92269.0 + throughput: 10.837876209777932 estimated_peak_memory_range: - min: 61440 - max: 52228808 + min: 172032 + max: 52022064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,7 +219,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 857 - job_id: jgjvolw1g + job_id: jprvlzm7g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 95581.0 + throughput: 10.462330379468723 + estimated_peak_memory_range: + min: 3117056 + max: 4466152 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 922 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 922 + job_id: jgo2ljxkp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -183,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:42:28Z' + timestamp: '2024-12-12T00:11:21Z' - torchscript_onnx_tflite: - inference_time: 873120.0 - throughput: 1.1453179402602163 + inference_time: 872285.0 + throughput: 1.1464143026648401 estimated_peak_memory_range: min: 221184 - max: 181481696 + max: 181466176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -197,7 +257,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 857 - job_id: jped8vl85 + job_id: jp2kr2qqp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 869525.0 + throughput: 1.1500531899600357 + estimated_peak_memory_range: + min: 2707456 + max: 12866848 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 922 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 922 + job_id: jgjvrj9eg job_status: Passed reference_device_info: name: SA7255P ADP @@ -206,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:42:29Z' + timestamp: '2024-12-12T00:11:23Z' - torchscript_onnx_tflite: - inference_time: 89776.0 - throughput: 11.138834432364998 + inference_time: 93383.0 + throughput: 10.708587216088581 estimated_peak_memory_range: - min: 147456 - max: 50513920 + min: 94208 + max: 51183256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -220,7 +295,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 857 - job_id: jgz387445 + job_id: jpy1o9klp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 101829.0 + throughput: 9.820385155505798 + estimated_peak_memory_range: + min: 3256320 + max: 4535888 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 922 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 922 + job_id: jped7jqv5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -229,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:42:30Z' + timestamp: '2024-12-12T00:11:25Z' - torchscript_onnx_tflite: - inference_time: 102904.0 - throughput: 9.717795226618986 + inference_time: 103091.0 + throughput: 9.700167812903164 estimated_peak_memory_range: - min: 147456 - max: 150394208 + min: 16384 + max: 150177344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -243,7 +333,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 857 - job_id: j5we89145 + job_id: jp0zmn8n5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 83256.0 + throughput: 12.011146343807052 + estimated_peak_memory_range: + min: 2781184 + max: 8878704 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 922 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 922 + job_id: jgz3l16x5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -252,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:42:32Z' + timestamp: '2024-12-12T00:11:26Z' - torchscript_onnx_tflite: - inference_time: 91594.0 - throughput: 10.917745703867066 + inference_time: 83102.0 + throughput: 12.033404731534741 estimated_peak_memory_range: - min: 172032 - max: 53748176 + min: 20480 + max: 53367792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -266,7 +371,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 857 - job_id: jg9lk4xmg + job_id: jp8qeldop + job_status: Passed + torchscript_onnx_qnn: + inference_time: 102125.0 + throughput: 9.791921664626683 + estimated_peak_memory_range: + min: 3026944 + max: 4273768 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 922 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 922 + job_id: j5weljkm5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -275,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:42:33Z' + timestamp: '2024-12-12T00:11:27Z' - torchscript_onnx_tflite: - inference_time: 108126.0 - throughput: 9.248469378317889 + inference_time: 108469.0 + throughput: 9.219223925729931 estimated_peak_memory_range: - min: 159744 - max: 180988416 + min: 135168 + max: 180988736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -289,7 +409,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 857 - job_id: jp1478vnp + job_id: jgke2jwng + job_status: Passed + torchscript_onnx_qnn: + inference_time: 107146.0 + throughput: 9.333059563586135 + estimated_peak_memory_range: + min: 1556480 + max: 12307088 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 922 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 922 + job_id: jp14nr97p job_status: Passed reference_device_info: name: SA8775P ADP @@ -298,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:42:34Z' + timestamp: '2024-12-12T00:11:28Z' - torchscript_onnx_tflite: - inference_time: 107673.0 - throughput: 9.2873793801603 + inference_time: 105787.0 + throughput: 9.452957357709359 estimated_peak_memory_range: - min: 135168 - max: 143135568 + min: 16384 + max: 148985488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -312,7 +447,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 857 - job_id: jgdx8vz6p + job_id: j5q6ljxop + job_status: Passed + torchscript_onnx_qnn: + inference_time: 106648.0 + throughput: 9.376640912159628 + estimated_peak_memory_range: + min: 2781184 + max: 148163056 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 922 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 922 + job_id: jgdxdjkzp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -321,13 +471,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:42:35Z' - - torchscript_onnx: - inference_time: 70326.0 - throughput: 14.219492079742912 + timestamp: '2024-12-12T00:11:29Z' + - torchscript_onnx_qnn: + inference_time: 80112.0 + throughput: 12.482524465747954 + estimated_peak_memory_range: + min: 2768896 + max: 2768896 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 922 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 922 + job_id: jpv6lj8r5 + job_status: Passed + torchscript_onnx: + inference_time: 70304.0 + throughput: 14.223941738734638 estimated_peak_memory_range: - min: 125317120 - max: 125317120 + min: 126156800 + max: 126156800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -335,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 886 - job_id: jp3jxm6mg + job_id: jgdxdjk6p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -344,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:42:54Z' + timestamp: '2024-12-12T00:11:34Z' diff --git a/qai_hub_models/models/detr_resnet50/export.py b/qai_hub_models/models/detr_resnet50/export.py index 6ae39c25..d22221de 100644 --- a/qai_hub_models/models/detr_resnet50/export.py +++ b/qai_hub_models/models/detr_resnet50/export.py @@ -206,7 +206,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/detr_resnet50/perf.yaml b/qai_hub_models/models/detr_resnet50/perf.yaml index aa45f9bc..b8bb5c93 100644 --- a/qai_hub_models/models/detr_resnet50/perf.yaml +++ b/qai_hub_models/models/detr_resnet50/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: DETR-ResNet50 universal_assets: - torchscript_onnx_tflite: mm6v4k2dq - torchscript_onnx: mqexp724n + torchscript_onnx_tflite: mqe7xr3vm + torchscript_onnx: mq36e8d6q performance_metrics: - torchscript_onnx_tflite: - inference_time: 10819.0 - throughput: 92.42998428690267 + inference_time: 10894.0 + throughput: 91.79364787956673 estimated_peak_memory_range: - min: 57344 - max: 25080016 + min: 40960 + max: 17959808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 788 - job_id: jp8q676qp + job_id: jped7jnv5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 11959.0 + throughput: 83.61903169161302 + estimated_peak_memory_range: + min: 2793472 + max: 24097008 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 803 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 803 + job_id: jprvlz77g job_status: Passed torchscript_onnx: - inference_time: 12164.0 - throughput: 82.20979940808944 + inference_time: 12186.0 + throughput: 82.06138191367143 estimated_peak_memory_range: - min: 16384 - max: 425607648 + min: 12288 + max: 100263960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 767 - job_id: jp2k4m46p + job_id: jpv6ljyr5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -84,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:41:52Z' + timestamp: '2024-12-12T00:10:30Z' - torchscript_onnx_tflite: - inference_time: 7712.0 - throughput: 129.66804979253112 + inference_time: 7737.0 + throughput: 129.24906294429366 estimated_peak_memory_range: - min: 69632 - max: 69056432 + min: 49152 + max: 67280144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -98,14 +113,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 788 - job_id: jgkeoyovg + job_id: jgz3l10x5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 8505.0 + throughput: 117.57789535567314 + estimated_peak_memory_range: + min: 2781184 + max: 70255792 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 803 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 803 + job_id: jp2kr2zqp job_status: Passed torchscript_onnx: - inference_time: 8834.0 - throughput: 113.19900384876613 + inference_time: 8886.0 + throughput: 112.53657438667567 estimated_peak_memory_range: - min: 2867200 - max: 227924496 + min: 2887680 + max: 227086480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 767 - job_id: jpy1qdq0p + job_id: jgjvrj6eg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -122,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:41:53Z' + timestamp: '2024-12-12T00:10:31Z' - torchscript_onnx_tflite: - inference_time: 7269.0 - throughput: 137.57050488375293 + inference_time: 7263.0 + throughput: 137.68415255404102 estimated_peak_memory_range: - min: 49152 - max: 67806160 + min: 53248 + max: 68528832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,7 +166,37 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 788 - job_id: j5q6z2zep + job_id: jg9lz6q8g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 6857.0 + throughput: 145.83637159107482 + estimated_peak_memory_range: + min: 2449408 + max: 70323120 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 803 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 803 + job_id: jpy1o9ylp + job_status: Passed + torchscript_onnx: + inference_time: 8512.0 + throughput: 117.4812030075188 + estimated_peak_memory_range: + min: 2871296 + max: 97573504 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 767 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 767 + job_id: jped7j0v5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -145,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:41:54Z' + timestamp: '2024-12-12T00:10:32Z' - torchscript_onnx_tflite: - inference_time: 10852.0 - throughput: 92.14891264283081 + inference_time: 10816.0 + throughput: 92.45562130177515 estimated_peak_memory_range: - min: 81920 - max: 27033560 + min: 16384 + max: 31128232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -159,7 +219,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 788 - job_id: jglvoko25 + job_id: jp14nrm7p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 10379.0 + throughput: 96.34839579920994 + estimated_peak_memory_range: + min: 2830336 + max: 4598256 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 803 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 803 + job_id: jp0zmnxn5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -168,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:41:31Z' + timestamp: '2024-12-12T00:10:21Z' - torchscript_onnx_tflite: - inference_time: 309163.0 - throughput: 3.2345397088267354 + inference_time: 309062.0 + throughput: 3.235596741106962 estimated_peak_memory_range: - min: 487424 - max: 68336640 + min: 0 + max: 65708208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -182,7 +257,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 788 - job_id: j56yr1rnp + job_id: jgdxdjmzp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 310572.0 + throughput: 3.21986528083665 + estimated_peak_memory_range: + min: 188416 + max: 10560976 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 803 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 803 + job_id: jgke2jkng job_status: Passed reference_device_info: name: SA7255P ADP @@ -191,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:41:32Z' + timestamp: '2024-12-12T00:10:23Z' - torchscript_onnx_tflite: - inference_time: 10817.0 - throughput: 92.44707405010631 + inference_time: 10799.0 + throughput: 92.60116677470135 estimated_peak_memory_range: - min: 65536 - max: 27262184 + min: 53248 + max: 29424240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -205,7 +295,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 788 - job_id: jp3jxmxmg + job_id: j57yeq895 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 10525.0 + throughput: 95.01187648456057 + estimated_peak_memory_range: + min: 4177920 + max: 5736336 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 803 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 803 + job_id: j5q6ljdop job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -214,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:41:33Z' + timestamp: '2024-12-12T00:10:24Z' - torchscript_onnx_tflite: - inference_time: 16397.0 - throughput: 60.986765871805815 + inference_time: 16473.0 + throughput: 60.7053967097675 estimated_peak_memory_range: min: 53248 - max: 48276480 + max: 49096912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -228,7 +333,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 788 - job_id: jgo2ovo1p + job_id: jp4lyz215 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 17021.0 + throughput: 58.750954703013925 + estimated_peak_memory_range: + min: 65536 + max: 6179440 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 803 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 803 + job_id: jglvyj9m5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -237,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:41:34Z' + timestamp: '2024-12-12T00:10:25Z' - torchscript_onnx_tflite: - inference_time: 10831.0 - throughput: 92.32757824762257 + inference_time: 10854.0 + throughput: 92.13193292795283 estimated_peak_memory_range: - min: 69632 - max: 34005680 + min: 53248 + max: 27962616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -251,7 +371,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 788 - job_id: jpv6ewez5 + job_id: jpxklwzl5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 10491.0 + throughput: 95.3197979220284 + estimated_peak_memory_range: + min: 4771840 + max: 5927560 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 803 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 803 + job_id: j56y8k9yp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -260,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:41:35Z' + timestamp: '2024-12-12T00:10:26Z' - torchscript_onnx_tflite: - inference_time: 18402.0 - throughput: 54.341919356591674 + inference_time: 18381.0 + throughput: 54.404004134704316 estimated_peak_memory_range: - min: 69632 - max: 66886608 + min: 0 + max: 66070944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -274,7 +409,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 788 - job_id: jgjvolo1g + job_id: j5mn0jl9p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 17926.0 + throughput: 55.78489345085351 + estimated_peak_memory_range: + min: 2502656 + max: 13367424 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 803 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 803 + job_id: jp3jzylng job_status: Passed reference_device_info: name: SA8775P ADP @@ -283,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:41:36Z' + timestamp: '2024-12-12T00:10:28Z' - torchscript_onnx_tflite: - inference_time: 14437.0 - throughput: 69.26646810279144 + inference_time: 14413.0 + throughput: 69.38180808991882 estimated_peak_memory_range: - min: 53248 - max: 49085744 + min: 151552 + max: 47313648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -297,7 +447,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 788 - job_id: jped8v885 + job_id: jgn6zjwq5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 15200.0 + throughput: 65.78947368421052 + estimated_peak_memory_range: + min: 2785280 + max: 54140944 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 803 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 803 + job_id: jgo2lj7kp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -306,13 +471,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:41:38Z' - - torchscript_onnx: - inference_time: 13381.0 - throughput: 74.73283013227712 + timestamp: '2024-12-12T00:10:29Z' + - torchscript_onnx_qnn: + inference_time: 10784.0 + throughput: 92.7299703264095 + estimated_peak_memory_range: + min: 2768896 + max: 2768896 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 803 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 803 + job_id: jp8qelkop + job_status: Passed + torchscript_onnx: + inference_time: 13323.0 + throughput: 75.0581700818134 estimated_peak_memory_range: - min: 82952192 - max: 82952192 + min: 84574208 + max: 84574208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -320,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 767 - job_id: jp8q674qp + job_id: jgz3l1qx5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -329,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:41:55Z' + timestamp: '2024-12-12T00:10:33Z' diff --git a/qai_hub_models/models/detr_resnet50_dc5/export.py b/qai_hub_models/models/detr_resnet50_dc5/export.py index 47f8b2a8..4cbe4330 100644 --- a/qai_hub_models/models/detr_resnet50_dc5/export.py +++ b/qai_hub_models/models/detr_resnet50_dc5/export.py @@ -206,7 +206,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/detr_resnet50_dc5/perf.yaml b/qai_hub_models/models/detr_resnet50_dc5/perf.yaml index 81c5d996..2c601a66 100644 --- a/qai_hub_models/models/detr_resnet50_dc5/perf.yaml +++ b/qai_hub_models/models/detr_resnet50_dc5/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: DETR-ResNet50-DC5 universal_assets: - torchscript_onnx_tflite: mn0x9j4wn - torchscript_onnx: mnjxk42kq + torchscript_onnx_tflite: mnz1v5xdq + torchscript_onnx: mnz1v50oq performance_metrics: - torchscript_onnx_tflite: - inference_time: 84968.0 - throughput: 11.76913661613784 + inference_time: 84142.0 + throughput: 11.884671151149247 estimated_peak_memory_range: - min: 163840 - max: 47802072 + min: 131072 + max: 47025464 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 789 - job_id: jpy1qdvlp + job_id: jglvyj8m5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 86212.0 + throughput: 11.599313320651417 + estimated_peak_memory_range: + min: 2895872 + max: 44766216 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 803 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 803 + job_id: jp14nr17p job_status: Passed torchscript_onnx: - inference_time: 70957.0 - throughput: 14.093042265033754 + inference_time: 67202.0 + throughput: 14.880509508645575 estimated_peak_memory_range: - min: 118784 - max: 100881800 + min: 98304 + max: 100198504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 767 - job_id: j57ykd6n5 + job_id: jp8qelrop job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -84,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:40:58Z' + timestamp: '2024-12-12T00:09:35Z' - torchscript_onnx_tflite: - inference_time: 60654.0 - throughput: 16.48695881557688 + inference_time: 60393.0 + throughput: 16.558210388621198 estimated_peak_memory_range: - min: 69632 - max: 155327680 + min: 94208 + max: 158548176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -98,14 +113,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 789 - job_id: jp0zdren5 + job_id: j56y8kmyp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 61378.0 + throughput: 16.29248264850598 + estimated_peak_memory_range: + min: 2813952 + max: 159459952 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 803 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 803 + job_id: jgdxdj4zp job_status: Passed torchscript_onnx: - inference_time: 53404.0 - throughput: 18.725189124410157 + inference_time: 53233.0 + throughput: 18.785339920725864 estimated_peak_memory_range: - min: 2670592 - max: 499429248 + min: 1032192 + max: 499068480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 767 - job_id: jp4lmw825 + job_id: jgke2j0ng job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -122,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:40:59Z' + timestamp: '2024-12-12T00:09:37Z' - torchscript_onnx_tflite: - inference_time: 49391.0 - throughput: 20.246603632240692 + inference_time: 55840.0 + throughput: 17.908309455587393 estimated_peak_memory_range: - min: 106496 - max: 162353440 + min: 53248 + max: 161772416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,14 +166,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 789 - job_id: jp8q67wop + job_id: jp3jzy7ng + job_status: Passed + torchscript_onnx_qnn: + inference_time: 49906.0 + throughput: 20.03767082114375 + estimated_peak_memory_range: + min: 2785280 + max: 164835520 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 803 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 803 + job_id: j57yeqn95 job_status: Passed torchscript_onnx: - inference_time: 44861.0 - throughput: 22.291076881924166 + inference_time: 42028.0 + throughput: 23.793661368611403 estimated_peak_memory_range: - min: 0 - max: 281937072 + min: 2895872 + max: 284127728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -151,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 767 - job_id: jpxk31m85 + job_id: j5q6lj1op job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -160,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:41:00Z' + timestamp: '2024-12-12T00:09:38Z' - torchscript_onnx_tflite: - inference_time: 85054.0 - throughput: 11.757236579114444 + inference_time: 84918.0 + throughput: 11.77606632280553 estimated_peak_memory_range: - min: 126976 - max: 53596472 + min: 155648 + max: 42886584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,7 +219,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 789 - job_id: jgkeoyrng + job_id: jgo2ljwkp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 98689.0 + throughput: 10.13284155275664 + estimated_peak_memory_range: + min: 3026944 + max: 4612824 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 803 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 803 + job_id: jp4lyz415 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -183,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:40:37Z' + timestamp: '2024-12-12T00:09:26Z' - torchscript_onnx_tflite: - inference_time: 631553.0 - throughput: 1.583398384616968 + inference_time: 631836.0 + throughput: 1.5826891788375466 estimated_peak_memory_range: - min: 159744 - max: 162022416 + min: 167936 + max: 161692448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -197,7 +257,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 789 - job_id: j5q6z29op + job_id: jpv6ljmr5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 630971.0 + throughput: 1.584858892088543 + estimated_peak_memory_range: + min: 221184 + max: 9963776 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 803 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 803 + job_id: j5mn0jk9p job_status: Passed reference_device_info: name: SA7255P ADP @@ -206,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:40:38Z' + timestamp: '2024-12-12T00:09:28Z' - torchscript_onnx_tflite: - inference_time: 84991.0 - throughput: 11.765951689002366 + inference_time: 85060.0 + throughput: 11.75640724194686 estimated_peak_memory_range: - min: 114688 - max: 47468168 + min: 163840 + max: 47374264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -220,7 +295,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 789 - job_id: jglvok7m5 + job_id: jgjvrjyeg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 99867.0 + throughput: 10.013317712557702 + estimated_peak_memory_range: + min: 3059712 + max: 4278408 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 803 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 803 + job_id: jgn6zjqq5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -229,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:40:39Z' + timestamp: '2024-12-12T00:09:30Z' - torchscript_onnx_tflite: - inference_time: 94835.0 - throughput: 10.54463014709759 + inference_time: 94960.0 + throughput: 10.530749789385004 estimated_peak_memory_range: - min: 172032 - max: 143335552 + min: 0 + max: 143105952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -243,7 +333,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 789 - job_id: j56yr1vyp + job_id: jped7jxv5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 75536.0 + throughput: 13.238720610040247 + estimated_peak_memory_range: + min: 2781184 + max: 8904544 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 803 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 803 + job_id: jprvlzd7g job_status: Passed reference_device_info: name: SA8295P ADP @@ -252,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:40:41Z' + timestamp: '2024-12-12T00:09:31Z' - torchscript_onnx_tflite: - inference_time: 86776.0 - throughput: 11.52392366552964 + inference_time: 84981.0 + throughput: 11.767336228098046 estimated_peak_memory_range: - min: 131072 - max: 56137360 + min: 77824 + max: 47237224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -266,7 +371,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 789 - job_id: jp3jxm8ng + job_id: jgz3l1yx5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 96967.0 + throughput: 10.312786824383553 + estimated_peak_memory_range: + min: 2846720 + max: 4382200 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 803 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 803 + job_id: jp2kr2dqp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -275,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:40:42Z' + timestamp: '2024-12-12T00:09:32Z' - torchscript_onnx_tflite: - inference_time: 95923.0 - throughput: 10.425028408202412 + inference_time: 95753.0 + throughput: 10.443537017116958 estimated_peak_memory_range: - min: 131072 - max: 161663568 + min: 155648 + max: 161866960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -289,7 +409,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 789 - job_id: jgo2ovmkp + job_id: j5weljzm5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 98506.0 + throughput: 10.151665888372282 + estimated_peak_memory_range: + min: 3620864 + max: 9430800 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 803 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 803 + job_id: jpy1o92lp job_status: Passed reference_device_info: name: SA8775P ADP @@ -298,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:40:43Z' + timestamp: '2024-12-12T00:09:33Z' - torchscript_onnx_tflite: - inference_time: 98287.0 - throughput: 10.174285510800004 + inference_time: 96967.0 + throughput: 10.312786824383553 estimated_peak_memory_range: - min: 139264 - max: 141578352 + min: 73728 + max: 140621168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -312,7 +447,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 789 - job_id: jpv6ew4r5 + job_id: jg9lz628g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 95163.0 + throughput: 10.508285783340163 + estimated_peak_memory_range: + min: 0 + max: 138877200 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 803 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 803 + job_id: jp0zmn9n5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -321,13 +471,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:40:44Z' - - torchscript_onnx: - inference_time: 65507.0 - throughput: 15.265544140320882 + timestamp: '2024-12-12T00:09:34Z' + - torchscript_onnx_qnn: + inference_time: 76074.0 + throughput: 13.145095564844757 + estimated_peak_memory_range: + min: 2768896 + max: 2768896 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 803 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 803 + job_id: jpxklwrl5 + job_status: Passed + torchscript_onnx: + inference_time: 65523.0 + throughput: 15.261816461395235 estimated_peak_memory_range: - min: 86622208 - max: 86622208 + min: 86671360 + max: 86671360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -335,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 767 - job_id: j5mnoz47p + job_id: jglvyjqm5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -344,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:41:02Z' + timestamp: '2024-12-12T00:09:38Z' diff --git a/qai_hub_models/models/efficientnet_b0/perf.yaml b/qai_hub_models/models/efficientnet_b0/perf.yaml index f21ff008..4162ae7e 100644 --- a/qai_hub_models/models/efficientnet_b0/perf.yaml +++ b/qai_hub_models/models/efficientnet_b0/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: EfficientNet-B0 universal_assets: - torchscript_onnx_tflite: mnzv412zq - torchscript_onnx: mm6v4k52q + torchscript_onnx_tflite: mno63yjgn + torchscript_onnx: mqp3z7jlm performance_metrics: - torchscript_onnx_tflite: - inference_time: 1613.0 - throughput: 619.9628022318661 + inference_time: 1604.0 + throughput: 623.4413965087282 estimated_peak_memory_range: min: 16384 - max: 84612040 + max: 97907360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jp1478d7p + job_id: jgke2j2wg job_status: Passed torchscript_onnx_qnn: - inference_time: 1686.0 - throughput: 593.1198102016607 + inference_time: 1663.0 + throughput: 601.3229104028864 estimated_peak_memory_range: - min: 20480 - max: 343996264 + min: 12288 + max: 85123432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: jp0zdr1n5 + job_id: j5weljy35 job_status: Passed torchscript_onnx: - inference_time: 1580.0 - throughput: 632.9113924050633 + inference_time: 1615.0 + throughput: 619.1950464396285 estimated_peak_memory_range: - min: 24576 - max: 248587296 + min: 626688 + max: 2184008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jgz387vx5 + job_id: j5mn0j99p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:40:04Z' + timestamp: '2024-12-12T00:08:42Z' - torchscript_onnx_tflite: - inference_time: 1129.0 - throughput: 885.7395925597874 + inference_time: 1131.0 + throughput: 884.1732979664014 estimated_peak_memory_range: min: 16384 - max: 19289360 + max: 21344944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jgdx8vrzp + job_id: j5q6ljlnp job_status: Passed torchscript_onnx_qnn: inference_time: 1176.0 throughput: 850.3401360544218 estimated_peak_memory_range: min: 0 - max: 20932016 + max: 20874016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: jp8q673op + job_id: jg9lz6owg job_status: Passed torchscript_onnx: - inference_time: 1158.0 - throughput: 863.5578583765113 + inference_time: 1133.0 + throughput: 882.61253309797 estimated_peak_memory_range: min: 0 - max: 85670608 + max: 90635408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j5we89mm5 + job_id: jgn6zj1q5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:40:05Z' + timestamp: '2024-12-12T00:08:43Z' - torchscript_onnx_tflite: - inference_time: 1123.0 - throughput: 890.4719501335708 + inference_time: 1124.0 + throughput: 889.6797153024911 estimated_peak_memory_range: - min: 0 - max: 15345824 + min: 12288 + max: 19015696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j57ykdj95 + job_id: jglvyjyj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1179.0 - throughput: 848.1764206955047 + inference_time: 955.0 + throughput: 1047.1204188481674 estimated_peak_memory_range: - min: 0 - max: 15345968 + min: 614400 + max: 18394656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: jgkeoylng + job_id: jp14nro8p job_status: Passed torchscript_onnx: - inference_time: 1178.0 - throughput: 848.8964346349745 + inference_time: 963.0 + throughput: 1038.4215991692627 estimated_peak_memory_range: min: 0 - max: 36194016 + max: 38849760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jg9lk498g + job_id: jprvlzx7g job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:40:06Z' + timestamp: '2024-12-12T00:08:44Z' - torchscript_onnx_tflite: - inference_time: 1602.0 - throughput: 624.2197253433209 + inference_time: 1606.0 + throughput: 622.66500622665 estimated_peak_memory_range: - min: 0 - max: 96469992 + min: 32768 + max: 95353216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jp4lmwx15 + job_id: j56y8k86p job_status: Passed torchscript_onnx_qnn: - inference_time: 1561.0 - throughput: 640.6149903907751 + inference_time: 1571.0 + throughput: 636.5372374283895 estimated_peak_memory_range: - min: 634880 - max: 2439120 + min: 630784 + max: 2183408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: j5q6z27op + job_id: jgdxdj6rp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:39:55Z' + timestamp: '2024-12-12T00:08:33Z' - torchscript_onnx_tflite: - inference_time: 22567.0 - throughput: 44.31249169140781 + inference_time: 22489.0 + throughput: 44.46618346747299 estimated_peak_memory_range: - min: 53248 - max: 15437632 + min: 4096 + max: 17098896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jpxk317l5 + job_id: jp3jzyz3g job_status: Passed torchscript_onnx_qnn: - inference_time: 22504.0 - throughput: 44.436544614290796 + inference_time: 22489.0 + throughput: 44.46618346747299 estimated_peak_memory_range: - min: 729088 - max: 6374352 + min: 548864 + max: 11056752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: j56yr1qyp + job_id: jg9lz6o8g job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:39:57Z' + timestamp: '2024-12-12T00:08:35Z' - torchscript_onnx_tflite: - inference_time: 1603.0 - throughput: 623.8303181534623 + inference_time: 1612.0 + throughput: 620.3473945409429 estimated_peak_memory_range: min: 16384 - max: 95947864 + max: 96413400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j5mnozw9p + job_id: jgo2ljlqp job_status: Passed torchscript_onnx_qnn: - inference_time: 1574.0 - throughput: 635.3240152477764 + inference_time: 1572.0 + throughput: 636.1323155216285 estimated_peak_memory_range: min: 634880 - max: 2261304 + max: 1830888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: jp3jxmqng + job_id: jp14nro7p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:39:58Z' + timestamp: '2024-12-12T00:08:36Z' - torchscript_onnx_tflite: - inference_time: 3784.0 - throughput: 264.2706131078224 + inference_time: 3800.0 + throughput: 263.1578947368421 estimated_peak_memory_range: min: 16384 - max: 16034192 + max: 16967664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jgn6oe9q5 + job_id: jpv6lj2k5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3675.0 - throughput: 272.10884353741494 + inference_time: 3805.0 + throughput: 262.8120893561104 estimated_peak_memory_range: min: 0 - max: 6007760 + max: 6104992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: jgo2ovekp + job_id: jgdxdj6zp job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:39:59Z' + timestamp: '2024-12-12T00:08:37Z' - torchscript_onnx_tflite: - inference_time: 1613.0 - throughput: 619.9628022318661 + inference_time: 1602.0 + throughput: 624.2197253433209 estimated_peak_memory_range: - min: 20480 - max: 95890256 + min: 28672 + max: 95763896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jprvoy47g + job_id: jgjvrj3vg job_status: Passed torchscript_onnx_qnn: - inference_time: 1570.0 - throughput: 636.9426751592357 + inference_time: 1574.0 + throughput: 635.3240152477764 estimated_peak_memory_range: - min: 638976 - max: 2280120 + min: 659456 + max: 1937224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: jpv6ewzr5 + job_id: j57yeqo95 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:40:00Z' + timestamp: '2024-12-12T00:08:39Z' - torchscript_onnx_tflite: - inference_time: 2556.0 - throughput: 391.23630672926447 + inference_time: 2577.0 + throughput: 388.04811796662784 estimated_peak_memory_range: - min: 16384 - max: 16356832 + min: 20480 + max: 19279936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jp2k4m7qp + job_id: jped7j6o5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2683.0 - throughput: 372.71710771524414 + inference_time: 2733.0 + throughput: 365.89828027808267 estimated_peak_memory_range: - min: 622592 - max: 6518880 + min: 618496 + max: 6429840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: jgjvolkeg + job_id: jp4lyze15 job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:40:02Z' + timestamp: '2024-12-12T00:08:40Z' - torchscript_onnx_tflite: - inference_time: 3083.0 - throughput: 324.3593902043464 + inference_time: 3095.0 + throughput: 323.10177705977384 estimated_peak_memory_range: min: 16384 - max: 24225296 + max: 30108304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jpy1qd4lp + job_id: jgz3l1zo5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3163.0 - throughput: 316.1555485298767 + inference_time: 3176.0 + throughput: 314.86146095717885 estimated_peak_memory_range: - min: 782336 - max: 25989168 + min: 618496 + max: 28569440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: jped8v4v5 + job_id: jpxklw0l5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:40:03Z' + timestamp: '2024-12-12T00:08:41Z' - torchscript_onnx_qnn: - inference_time: 1782.0 - throughput: 561.1672278338945 + inference_time: 1746.0 + throughput: 572.737686139748 estimated_peak_memory_range: min: 602112 max: 602112 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: jglvokem5 + job_id: j5weljym5 job_status: Passed torchscript_onnx: - inference_time: 1694.0 - throughput: 590.318772136954 + inference_time: 1663.0 + throughput: 601.3229104028864 estimated_peak_memory_range: - min: 15724544 - max: 15724544 + min: 15618048 + max: 15618048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jp1478q7p + job_id: jp2kr2oqp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:40:07Z' + timestamp: '2024-12-12T00:08:45Z' diff --git a/qai_hub_models/models/efficientnet_b4/perf.yaml b/qai_hub_models/models/efficientnet_b4/perf.yaml index 7243098f..39347c82 100644 --- a/qai_hub_models/models/efficientnet_b4/perf.yaml +++ b/qai_hub_models/models/efficientnet_b4/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: EfficientNet-B4 universal_assets: - torchscript_onnx_tflite: mmr6234xm - torchscript_onnx: mn7lkx08q + torchscript_onnx_tflite: mmxe7wgkn + torchscript_onnx: mqe7xrg7m performance_metrics: - torchscript_onnx_tflite: - inference_time: 3619.0 - throughput: 276.3194252555955 + inference_time: 3632.0 + throughput: 275.3303964757709 estimated_peak_memory_range: - min: 20480 - max: 293714048 + min: 16384 + max: 293559488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 482 - job_id: jpv6ewwr5 + job_id: jpxklw435 job_status: Passed torchscript_onnx_qnn: inference_time: 3731.0 throughput: 268.0246582685607 estimated_peak_memory_range: min: 12288 - max: 250638824 + max: 251522360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 480 - job_id: jpxk311l5 + job_id: jglvyjxj5 job_status: Passed torchscript_onnx: - inference_time: 3562.0 - throughput: 280.74115665356544 + inference_time: 3485.0 + throughput: 286.94404591104734 estimated_peak_memory_range: - min: 20480 - max: 52296984 + min: 16384 + max: 51919952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 482 - job_id: j56yr13yp + job_id: j57yeqev5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:39:17Z' + timestamp: '2024-12-12T00:07:55Z' - torchscript_onnx_tflite: - inference_time: 2635.0 - throughput: 379.5066413662239 + inference_time: 2607.0 + throughput: 383.5826620636747 estimated_peak_memory_range: min: 16384 - max: 26898704 + max: 30224944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 482 - job_id: jgjvolleg + job_id: j5mn0jmdp job_status: Passed torchscript_onnx_qnn: - inference_time: 2691.0 - throughput: 371.6090672612412 + inference_time: 2675.0 + throughput: 373.8317757009346 estimated_peak_memory_range: - min: 618496 - max: 26535680 + min: 0 + max: 29791968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 480 - job_id: j5mnozz9p + job_id: j56y8k76p job_status: Passed torchscript_onnx: - inference_time: 2586.0 - throughput: 386.69760247486465 + inference_time: 2612.0 + throughput: 382.84839203675347 estimated_peak_memory_range: min: 0 - max: 171247376 + max: 172514128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 482 - job_id: jp3jxm4ng + job_id: jp4lyzy85 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:39:18Z' + timestamp: '2024-12-12T00:07:57Z' - torchscript_onnx_tflite: - inference_time: 2123.0 - throughput: 471.03155911446066 + inference_time: 2106.0 + throughput: 474.8338081671415 estimated_peak_memory_range: min: 12288 - max: 27147984 + max: 29217024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 482 - job_id: jped8vvv5 + job_id: jgn6zjnk5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2547.0 - throughput: 392.61876717707105 + inference_time: 2165.0 + throughput: 461.8937644341801 estimated_peak_memory_range: min: 0 - max: 25943568 + max: 27504576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 480 - job_id: jgn6oeeq5 + job_id: jp3jzy93g job_status: Passed torchscript_onnx: - inference_time: 2504.0 - throughput: 399.36102236421726 + inference_time: 2135.0 + throughput: 468.384074941452 estimated_peak_memory_range: - min: 614400 - max: 72580128 + min: 0 + max: 73986688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 482 - job_id: jgo2ov1kp + job_id: jpxklwl35 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:39:19Z' + timestamp: '2024-12-12T00:07:58Z' - torchscript_onnx_tflite: - inference_time: 3620.0 - throughput: 276.24309392265195 + inference_time: 3605.0 + throughput: 277.39251040221916 estimated_peak_memory_range: - min: 16384 - max: 294148864 + min: 28672 + max: 293851544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 482 - job_id: jgz3877x5 + job_id: jprvlz00g job_status: Passed torchscript_onnx_qnn: - inference_time: 3337.0 - throughput: 299.6703626011387 + inference_time: 3345.0 + throughput: 298.9536621823617 estimated_peak_memory_range: - min: 630784 - max: 1979568 + min: 651264 + max: 2179968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 480 - job_id: jprvoyy7g + job_id: jgo2ljrqp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:39:08Z' + timestamp: '2024-12-12T00:07:46Z' - torchscript_onnx_tflite: - inference_time: 7240.0 - throughput: 138.12154696132598 + inference_time: 7276.0 + throughput: 137.43815283122595 estimated_peak_memory_range: - min: 20480 - max: 37348416 + min: 24576 + max: 38395440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 482 - job_id: jp4lmww15 + job_id: j5q6ljynp job_status: Passed torchscript_onnx_qnn: - inference_time: 7454.0 - throughput: 134.15615776764153 + inference_time: 7414.0 + throughput: 134.87995683841382 estimated_peak_memory_range: min: 0 - max: 37902576 + max: 37629776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 480 - job_id: jglvok0m5 + job_id: jgdxdjdrp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -281,10 +281,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:39:16Z' + timestamp: '2024-12-12T00:07:54Z' - torchscript_onnx_qnn: - inference_time: 3627.0 - throughput: 275.70995312930796 + inference_time: 3607.0 + throughput: 277.2387025228722 estimated_peak_memory_range: min: 602112 max: 602112 @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 480 - job_id: jp2k4mmqp + job_id: jpv6ljlk5 job_status: Passed torchscript_onnx: - inference_time: 3746.0 - throughput: 266.95141484249865 + inference_time: 3699.0 + throughput: 270.3433360367667 estimated_peak_memory_range: - min: 47886336 - max: 47886336 + min: 48967680 + max: 48967680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 482 - job_id: jpv6ew1r5 + job_id: j5mn0j0dp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -319,4 +319,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:39:20Z' + timestamp: '2024-12-12T00:07:59Z' diff --git a/qai_hub_models/models/efficientnet_v2_s/perf.yaml b/qai_hub_models/models/efficientnet_v2_s/perf.yaml index 029984ee..5bdecc66 100644 --- a/qai_hub_models/models/efficientnet_v2_s/perf.yaml +++ b/qai_hub_models/models/efficientnet_v2_s/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: EfficientNet-V2-s universal_assets: - torchscript_onnx_tflite: mqy3wv6xm - torchscript_onnx: mqy3wv1xm + torchscript_onnx_tflite: mqyv3zx7q + torchscript_onnx: mnl6v4kkn performance_metrics: - torchscript_onnx_tflite: - inference_time: 2795.0 - throughput: 357.78175313059035 + inference_time: 2775.0 + throughput: 360.36036036036035 estimated_peak_memory_range: - min: 20480 - max: 232611816 + min: 24576 + max: 233071416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 508 - job_id: jglvok2j5 + job_id: jgz3l1no5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2883.0 - throughput: 346.860908775581 + inference_time: 2878.0 + throughput: 347.46351633078524 estimated_peak_memory_range: min: 16384 - max: 179128656 + max: 179076128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 506 - job_id: jp1478e8p + job_id: jprvlzw0g job_status: Passed torchscript_onnx: - inference_time: 2885.0 - throughput: 346.6204506065858 + inference_time: 2903.0 + throughput: 344.47123665173956 estimated_peak_memory_range: - min: 12288 - max: 51896032 + min: 20480 + max: 52066872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 508 - job_id: jprvoye7g + job_id: jpv6ljdk5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:38:27Z' + timestamp: '2024-12-12T00:07:06Z' - torchscript_onnx_tflite: inference_time: 2051.0 throughput: 487.56704046806436 estimated_peak_memory_range: min: 16384 - max: 25792080 + max: 27515568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 508 - job_id: j56yr1z6p + job_id: j5welj435 job_status: Passed torchscript_onnx_qnn: - inference_time: 2113.0 - throughput: 473.260766682442 + inference_time: 2114.0 + throughput: 473.0368968779565 estimated_peak_memory_range: - min: 618496 - max: 27916352 + min: 0 + max: 26891648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 506 - job_id: jgdx8vorp + job_id: jp2kr2erp job_status: Passed torchscript_onnx: - inference_time: 2163.0 - throughput: 462.32085067036525 + inference_time: 2140.0 + throughput: 467.2897196261682 estimated_peak_memory_range: min: 0 - max: 204003360 + max: 204745136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 508 - job_id: jp2k4mlqp + job_id: jgjvrj7vg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:38:28Z' + timestamp: '2024-12-12T00:07:07Z' - torchscript_onnx_tflite: - inference_time: 1637.0 - throughput: 610.8735491753207 + inference_time: 1940.0 + throughput: 515.4639175257732 estimated_peak_memory_range: - min: 36864 - max: 28952112 + min: 12288 + max: 29743696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 508 - job_id: jp3jxm13g + job_id: jg9lz6dwg job_status: Passed torchscript_onnx_qnn: - inference_time: 1734.0 - throughput: 576.7012687427913 + inference_time: 2011.0 + throughput: 497.2650422675286 estimated_peak_memory_range: min: 0 - max: 27378352 + max: 28359728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 506 - job_id: j5we89qm5 + job_id: jpy1o9m8p job_status: Passed torchscript_onnx: - inference_time: 1752.0 - throughput: 570.7762557077625 + inference_time: 2044.0 + throughput: 489.23679060665364 estimated_peak_memory_range: min: 0 - max: 67828320 + max: 68968128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 508 - job_id: jpy1qd6lp + job_id: jped7jzo5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:38:29Z' + timestamp: '2024-12-12T00:07:08Z' - torchscript_onnx_tflite: - inference_time: 2781.0 - throughput: 359.5828838547285 + inference_time: 2767.0 + throughput: 361.4022406938923 estimated_peak_memory_range: - min: 16384 - max: 233126440 + min: 2539520 + max: 235697640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 508 - job_id: jgo2ovnqp + job_id: jp14nr68p job_status: Passed torchscript_onnx_qnn: - inference_time: 2745.0 - throughput: 364.29872495446267 + inference_time: 2755.0 + throughput: 362.9764065335753 estimated_peak_memory_range: - min: 626688 - max: 1767272 + min: 634880 + max: 1901312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 506 - job_id: jg9lk4w8g + job_id: jp0zmn695 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:38:18Z' + timestamp: '2024-12-12T00:06:56Z' - torchscript_onnx_tflite: - inference_time: 5190.0 - throughput: 192.67822736030828 + inference_time: 5263.0 + throughput: 190.00570017100512 estimated_peak_memory_range: min: 20480 - max: 35730544 + max: 35259664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 508 - job_id: jg9lk4wwg + job_id: jgn6zjkk5 job_status: Passed torchscript_onnx_qnn: - inference_time: 5295.0 - throughput: 188.85741265344666 + inference_time: 5417.0 + throughput: 184.60402436773123 estimated_peak_memory_range: - min: 618496 - max: 34551120 + min: 0 + max: 37585648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 506 - job_id: jgn6oe3q5 + job_id: jgo2lj8qp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -281,10 +281,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:38:26Z' + timestamp: '2024-12-12T00:07:04Z' - torchscript_onnx_qnn: - inference_time: 3009.0 - throughput: 332.33632436025255 + inference_time: 3018.0 + throughput: 331.3452617627568 estimated_peak_memory_range: min: 602112 max: 602112 @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 506 - job_id: jp1478e7p + job_id: jp8qel1kp job_status: Passed torchscript_onnx: - inference_time: 3047.0 - throughput: 328.1916639317361 + inference_time: 3091.0 + throughput: 323.51989647363314 estimated_peak_memory_range: - min: 49750016 - max: 49750016 + min: 51331072 + max: 51331072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 508 - job_id: jp0zdrln5 + job_id: jgz3l1mo5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -319,4 +319,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:38:30Z' + timestamp: '2024-12-12T00:07:09Z' diff --git a/qai_hub_models/models/efficientvit_b2_cls/perf.yaml b/qai_hub_models/models/efficientvit_b2_cls/perf.yaml index 1fabd37e..46f158b2 100644 --- a/qai_hub_models/models/efficientvit_b2_cls/perf.yaml +++ b/qai_hub_models/models/efficientvit_b2_cls/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: EfficientViT-b2-cls universal_assets: - torchscript_onnx_tflite: mmr6233xm - torchscript_onnx: mq8kxdkpq + torchscript_onnx_tflite: mm6kvdo4n + torchscript_onnx: mq36e82lq performance_metrics: - torchscript_onnx_tflite: - inference_time: 7746.0 - throughput: 129.09888974954816 + inference_time: 7799.0 + throughput: 128.22156686754712 estimated_peak_memory_range: - min: 32768 - max: 86406776 + min: 24576 + max: 233089264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 379 - job_id: jprvoy80g + job_id: jpv6ljej5 job_status: Passed torchscript_onnx_qnn: - inference_time: 7520.0 - throughput: 132.9787234042553 + inference_time: 7526.0 + throughput: 132.87270794578794 estimated_peak_memory_range: - min: 32768 - max: 192558800 + min: 36864 + max: 224093528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 398 - job_id: jgo2ovkqp + job_id: jgdxdjzrp job_status: Passed torchscript_onnx: - inference_time: 7058.0 - throughput: 141.68319637291017 + inference_time: 7112.0 + throughput: 140.607424071991 estimated_peak_memory_range: - min: 12288 - max: 60569448 + min: 32768 + max: 60237368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 370 - job_id: j5mnoz6dp + job_id: jgke2j9wg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:37:37Z' + timestamp: '2024-12-12T00:06:15Z' - torchscript_onnx_tflite: - inference_time: 5236.0 - throughput: 190.98548510313216 + inference_time: 5200.0 + throughput: 192.30769230769232 estimated_peak_memory_range: min: 20480 - max: 33987488 + max: 37576144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 379 - job_id: jp2k4m0rp + job_id: jgjvrjoxg job_status: Passed torchscript_onnx_qnn: - inference_time: 4975.0 - throughput: 201.00502512562815 + inference_time: 4967.0 + throughput: 201.32876988121603 estimated_peak_memory_range: min: 618496 - max: 37632432 + max: 38325584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 398 - job_id: jpv6ewrk5 + job_id: j57yeq7v5 job_status: Passed torchscript_onnx: - inference_time: 4858.0 - throughput: 205.8460271716756 + inference_time: 4867.0 + throughput: 205.4653790836244 estimated_peak_memory_range: - min: 0 - max: 193879984 + min: 49152 + max: 194131952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 370 - job_id: jgn6oemk5 + job_id: j5q6ljmnp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:37:38Z' + timestamp: '2024-12-12T00:06:16Z' - torchscript_onnx_tflite: - inference_time: 5209.0 - throughput: 191.9754271453254 + inference_time: 4385.0 + throughput: 228.05017103762827 estimated_peak_memory_range: min: 12288 - max: 37094368 + max: 38947120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 379 - job_id: jpy1qdr8p + job_id: jgz3l18k5 job_status: Passed torchscript_onnx_qnn: - inference_time: 4338.0 - throughput: 230.52097740894422 + inference_time: 5308.0 + throughput: 188.39487565938205 estimated_peak_memory_range: min: 614400 - max: 35975296 + max: 37523104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 398 - job_id: jped8vwo5 + job_id: jp4lyz985 job_status: Passed torchscript_onnx: - inference_time: 3846.0 - throughput: 260.01040041601664 + inference_time: 4539.0 + throughput: 220.3128442388191 estimated_peak_memory_range: - min: 0 - max: 57492672 + min: 626688 + max: 60912720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 370 - job_id: jprvoy20g + job_id: jglvyj1j5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:37:39Z' + timestamp: '2024-12-12T00:06:18Z' - torchscript_onnx_tflite: - inference_time: 7625.0 - throughput: 131.14754098360655 + inference_time: 7597.0 + throughput: 131.6309069369488 estimated_peak_memory_range: - min: 24576 - max: 253694008 + min: 28672 + max: 256319272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 379 - job_id: jp0zdr395 + job_id: j5welj165 job_status: Passed torchscript_onnx_qnn: - inference_time: 7180.0 - throughput: 139.27576601671308 + inference_time: 7205.0 + throughput: 138.79250520471894 estimated_peak_memory_range: - min: 630784 - max: 1943248 + min: 647168 + max: 2156304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 398 - job_id: jgz387jo5 + job_id: jpxklwd35 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:37:27Z' + timestamp: '2024-12-12T00:06:07Z' - torchscript_onnx_tflite: - inference_time: 9027.0 - throughput: 110.77877478675086 + inference_time: 8959.0 + throughput: 111.61960040183057 estimated_peak_memory_range: - min: 36864 - max: 35713856 + min: 32768 + max: 37766768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 379 - job_id: jp3jxmv3g + job_id: jp14nrv8p job_status: Passed torchscript_onnx_qnn: - inference_time: 8624.0 - throughput: 115.95547309833024 + inference_time: 8589.0 + throughput: 116.42798928862499 estimated_peak_memory_range: min: 0 - max: 36995952 + max: 36188192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 398 - job_id: jpxk31635 + job_id: jp8qel4kp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -281,10 +281,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:37:36Z' + timestamp: '2024-12-12T00:06:14Z' - torchscript_onnx_qnn: - inference_time: 7690.0 - throughput: 130.03901170351105 + inference_time: 7689.0 + throughput: 130.05592404734037 estimated_peak_memory_range: min: 602112 max: 602112 @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 398 - job_id: j5we89335 + job_id: j5mn0jddp job_status: Passed torchscript_onnx: - inference_time: 7938.0 - throughput: 125.97631645250694 + inference_time: 7900.0 + throughput: 126.58227848101266 estimated_peak_memory_range: - min: 51924992 - max: 51924992 + min: 53264384 + max: 53264384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 370 - job_id: jp2k4m9rp + job_id: j56y8kd6p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -319,4 +319,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:37:40Z' + timestamp: '2024-12-12T00:06:19Z' diff --git a/qai_hub_models/models/efficientvit_l2_cls/perf.yaml b/qai_hub_models/models/efficientvit_l2_cls/perf.yaml index aa08be16..74942388 100644 --- a/qai_hub_models/models/efficientvit_l2_cls/perf.yaml +++ b/qai_hub_models/models/efficientvit_l2_cls/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: EfficientViT-l2-cls universal_assets: - torchscript_onnx_tflite: mn7lkxxoq - torchscript_onnx: mnjxk4pyq + torchscript_onnx_tflite: mn0jxz9xm + torchscript_onnx: mqp3z72om performance_metrics: - torchscript_onnx_tflite: - inference_time: 22123.0 - throughput: 45.201826153776615 + inference_time: 22194.0 + throughput: 45.05722267279445 estimated_peak_memory_range: - min: 0 - max: 29104784 + min: 53248 + max: 29900664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 675 - job_id: jg9lk40wg + job_id: jp8qel8xp job_status: Passed torchscript_onnx_qnn: - inference_time: 21782.0 - throughput: 45.909466531998895 + inference_time: 21893.0 + throughput: 45.676700315169235 estimated_peak_memory_range: - min: 53248 - max: 28610536 + min: 667648 + max: 28853344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 730 - job_id: jpy1qdz8p + job_id: jgz3l1wk5 job_status: Passed torchscript_onnx: - inference_time: 15786.0 - throughput: 63.34726973267452 + inference_time: 15874.0 + throughput: 62.996094242156985 estimated_peak_memory_range: - min: 16384 - max: 169918376 + min: 45056 + max: 170167992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 708 - job_id: jped8veo5 + job_id: jp2kr244p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:36:48Z' + timestamp: '2024-12-12T00:05:27Z' - torchscript_onnx_tflite: - inference_time: 15852.0 - throughput: 63.083522583901086 + inference_time: 15853.0 + throughput: 63.07954330410648 estimated_peak_memory_range: - min: 36864 - max: 89353856 + min: 20480 + max: 91318160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 675 - job_id: jp147828p + job_id: jgke2jd2g job_status: Passed torchscript_onnx_qnn: - inference_time: 15628.0 - throughput: 63.9877143588431 + inference_time: 15528.0 + throughput: 64.39979392065945 estimated_peak_memory_range: - min: 643072 - max: 88596704 + min: 68235264 + max: 162082576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 730 - job_id: jp0zdr495 + job_id: j5welj865 job_status: Passed torchscript_onnx: - inference_time: 10917.0 - throughput: 91.60025648071814 + inference_time: 10839.0 + throughput: 92.25943352707814 estimated_peak_memory_range: - min: 663552 - max: 369888896 + min: 0 + max: 370427312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 708 - job_id: jgz387oo5 + job_id: jpy1o9q7p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:36:49Z' + timestamp: '2024-12-12T00:05:28Z' - torchscript_onnx_tflite: - inference_time: 17053.0 - throughput: 58.64070837975723 + inference_time: 13992.0 + throughput: 71.4694110920526 estimated_peak_memory_range: - min: 16384 - max: 92749280 + min: 24576 + max: 93208672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 675 - job_id: jgdx8vnrp + job_id: j5q6ljw4p job_status: Passed torchscript_onnx_qnn: - inference_time: 16838.0 - throughput: 59.38947618482005 + inference_time: 16869.0 + throughput: 59.280336712312526 estimated_peak_memory_range: min: 614400 - max: 94273968 + max: 94980672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 730 - job_id: jp8q672kp + job_id: jg9lz6klg job_status: Passed torchscript_onnx: - inference_time: 8192.0 - throughput: 122.0703125 + inference_time: 9693.0 + throughput: 103.16723408645414 estimated_peak_memory_range: - min: 229376 - max: 151469040 + min: 0 + max: 152613040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 708 - job_id: j5we89235 + job_id: jp0zmnd65 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:36:50Z' + timestamp: '2024-12-12T00:05:29Z' - torchscript_onnx_tflite: - inference_time: 22106.0 - throughput: 45.236587351850176 + inference_time: 22082.0 + throughput: 45.28575310207409 estimated_peak_memory_range: - min: 24576 - max: 29054992 + min: 36864 + max: 26628448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 675 - job_id: j57ykd2v5 + job_id: jglvyj785 job_status: Passed torchscript_onnx_qnn: - inference_time: 15259.0 - throughput: 65.53509404285995 + inference_time: 15269.0 + throughput: 65.49217368524461 estimated_peak_memory_range: - min: 684032 - max: 1925200 + min: 626688 + max: 1853728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 730 - job_id: jgkeoyvwg + job_id: jp14nr72p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:36:39Z' + timestamp: '2024-12-12T00:05:17Z' - torchscript_onnx_tflite: - inference_time: 20125.0 - throughput: 49.68944099378882 + inference_time: 20009.0 + throughput: 49.9775101204458 estimated_peak_memory_range: - min: 49152 - max: 66190816 + min: 16384 + max: 66901296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 675 - job_id: jp2k4mxrp + job_id: jped7j215 job_status: Passed torchscript_onnx_qnn: - inference_time: 19896.0 - throughput: 50.26135906714918 + inference_time: 20067.0 + throughput: 49.83305925150745 estimated_peak_memory_range: - min: 618496 - max: 69623440 + min: 0 + max: 70188112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 730 - job_id: jgjvolzvg + job_id: jprvlzo9g job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -281,10 +281,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:36:47Z' + timestamp: '2024-12-12T00:05:26Z' - torchscript_onnx_qnn: - inference_time: 16145.0 - throughput: 61.93868070610096 + inference_time: 16168.0 + throughput: 61.85056902523503 estimated_peak_memory_range: min: 602112 max: 602112 @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 730 - job_id: j5q6z20np + job_id: jgdxdj8ep job_status: Passed torchscript_onnx: - inference_time: 17288.0 - throughput: 57.84359093012494 + inference_time: 17324.0 + throughput: 57.723389517432466 estimated_peak_memory_range: - min: 136552448 - max: 136552448 + min: 137457664 + max: 137457664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 708 - job_id: jg9lk4jwg + job_id: jp8qel6xp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -319,4 +319,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:36:51Z' + timestamp: '2024-12-12T00:05:30Z' diff --git a/qai_hub_models/models/efficientvit_l2_seg/perf.yaml b/qai_hub_models/models/efficientvit_l2_seg/perf.yaml index eff922e1..c20e6b0c 100644 --- a/qai_hub_models/models/efficientvit_l2_seg/perf.yaml +++ b/qai_hub_models/models/efficientvit_l2_seg/perf.yaml @@ -44,14 +44,14 @@ aggregated: models: - name: EfficientViT-l2-seg universal_assets: - torchscript_onnx: mn1z8z94m + torchscript_onnx: mn0jxz7xm performance_metrics: - torchscript_onnx_qnn: - inference_time: 22359130.0 - throughput: 0.044724459314830226 + inference_time: 22445427.0 + throughput: 0.044552505060384905 estimated_peak_memory_range: - min: 25272320 - max: 130493288 + min: 25325568 + max: 133598056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -59,14 +59,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 773 - job_id: j57ykdyv5 + job_id: jp3jzyqlg job_status: Passed torchscript_onnx: - inference_time: 1693532.0 - throughput: 0.5904819040915672 + inference_time: 1763476.0 + throughput: 0.5670618709866196 estimated_peak_memory_range: - min: 125976576 - max: 129400616 + min: 123842560 + max: 128627816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -74,7 +74,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 756 - job_id: jglvokvj5 + job_id: jp4lyz8v5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -83,13 +83,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:35:54Z' + timestamp: '2024-12-12T00:04:33Z' - torchscript_onnx_qnn: - inference_time: 16346762.0 - throughput: 0.061174194620316855 + inference_time: 16390540.0 + throughput: 0.06101080257270352 estimated_peak_memory_range: min: 25182208 - max: 491689680 + max: 495877488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -97,14 +97,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 773 - job_id: jp4lmwl85 + job_id: jgo2ljexp job_status: Passed torchscript_onnx: - inference_time: 1672202.0 - throughput: 0.598013876313986 + inference_time: 1596237.0 + throughput: 0.6264733870972794 estimated_peak_memory_range: - min: 116760576 - max: 2736884960 + min: 117387264 + max: 2740382528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -112,7 +112,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 756 - job_id: j56yr1y6p + job_id: jpxklwm15 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -121,13 +121,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:35:55Z' + timestamp: '2024-12-12T00:04:34Z' - torchscript_onnx_qnn: - inference_time: 19129435.0 - throughput: 0.052275459259512894 + inference_time: 19079984.0 + throughput: 0.05241094541798358 estimated_peak_memory_range: - min: 25178112 - max: 542654832 + min: 3211264 + max: 520426192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -135,14 +135,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 773 - job_id: j5mnozndp + job_id: jpv6ljzj5 job_status: Passed torchscript_onnx: - inference_time: 1250808.0 - throughput: 0.7994832140504378 + inference_time: 1195574.0 + throughput: 0.8364183229143491 estimated_peak_memory_range: - min: 89673728 - max: 1147476624 + min: 65011712 + max: 1121502336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -150,7 +150,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 756 - job_id: jp3jxmj3g + job_id: j5mn0j4wp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -159,13 +159,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:35:56Z' + timestamp: '2024-12-12T00:04:35Z' - torchscript_onnx_qnn: - inference_time: 15514257.0 - throughput: 0.06445684121385897 + inference_time: 15568676.0 + throughput: 0.06423153773641381 estimated_peak_memory_range: - min: 26796032 - max: 28091512 + min: 26263552 + max: 27483912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -173,7 +173,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 773 - job_id: jgn6oe6k5 + job_id: jgjvrjkxg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -182,13 +182,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:35:44Z' + timestamp: '2024-12-12T00:04:23Z' - torchscript_onnx_qnn: - inference_time: 21605057.0 - throughput: 0.04628545992727536 + inference_time: 21592096.0 + throughput: 0.04631324351281135 estimated_peak_memory_range: - min: 25182208 - max: 274158288 + min: 22441984 + max: 274662544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 773 - job_id: j5q6z26np + job_id: j57yeq6l5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -205,10 +205,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:35:53Z' + timestamp: '2024-12-12T00:04:31Z' - torchscript_onnx_qnn: - inference_time: 16302502.0 - throughput: 0.06134027770706607 + inference_time: 16297865.0 + throughput: 0.06135772998487839 estimated_peak_memory_range: min: 25214976 max: 25214976 @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 773 - job_id: jprvoyv0g + job_id: jped7j415 job_status: Passed torchscript_onnx: - inference_time: 3022030.0 - throughput: 0.33090339937062174 + inference_time: 2612220.0 + throughput: 0.38281614871641745 estimated_peak_memory_range: - min: 158101504 - max: 158101504 + min: 158437376 + max: 158437376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 756 - job_id: jgo2ov2qp + job_id: jgn6zjxr5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -243,4 +243,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:35:58Z' + timestamp: '2024-12-12T00:04:36Z' diff --git a/qai_hub_models/models/esrgan/perf.yaml b/qai_hub_models/models/esrgan/perf.yaml index d5b17f2e..f62c8177 100644 --- a/qai_hub_models/models/esrgan/perf.yaml +++ b/qai_hub_models/models/esrgan/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: ESRGAN universal_assets: - torchscript_onnx_tflite: mm6v4vl5q - torchscript_onnx: mqy3w35rm + torchscript_onnx_tflite: mn1wzrrvm + torchscript_onnx: mm5ed1ykm performance_metrics: - torchscript_onnx_tflite: - inference_time: 69024.0 - throughput: 14.48771441817339 + inference_time: 64319.0 + throughput: 15.547505402758128 estimated_peak_memory_range: - min: 3203072 - max: 42797000 + min: 36864 + max: 116428144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: jg9lk4nlg + job_id: jpy1o947p job_status: Passed torchscript_onnx_qnn: - inference_time: 66742.0 - throughput: 14.983069131880974 + inference_time: 68378.0 + throughput: 14.624586855421335 estimated_peak_memory_range: - min: 16384 - max: 40215552 + min: 53248 + max: 38498768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: jpy1qd37p + job_id: jgjvrj0xg job_status: Passed torchscript_onnx: - inference_time: 71631.0 - throughput: 13.960436124024515 + inference_time: 70306.0 + throughput: 14.223537109208317 estimated_peak_memory_range: - min: 90112 - max: 44333728 + min: 172032 + max: 44638656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jped8vm15 + job_id: jprvlz19g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:34:47Z' + timestamp: '2024-12-12T00:03:27Z' - torchscript_onnx_tflite: - inference_time: 48914.0 - throughput: 20.444044649793515 + inference_time: 51252.0 + throughput: 19.511433700148288 estimated_peak_memory_range: - min: 3407872 - max: 693788464 + min: 3194880 + max: 116706064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: jp14d13lp + job_id: jp0zmn165 job_status: Passed torchscript_onnx_qnn: - inference_time: 50107.0 - throughput: 19.95729139641168 + inference_time: 50391.0 + throughput: 19.84481355797662 estimated_peak_memory_range: - min: 69632 - max: 111985824 + min: 73728 + max: 112313936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: jp0zdr065 + job_id: jped7jr15 job_status: Passed torchscript_onnx: - inference_time: 49647.0 - throughput: 20.1422039599573 + inference_time: 50782.0 + throughput: 19.69201685636643 estimated_peak_memory_range: - min: 7036928 - max: 730697376 + min: 6434816 + max: 730279536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jgz387dk5 + job_id: jp2kr234p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:34:48Z' + timestamp: '2024-12-12T00:03:28Z' - torchscript_onnx_tflite: - inference_time: 37457.0 - throughput: 26.69727954721414 + inference_time: 38648.0 + throughput: 25.874560132477747 estimated_peak_memory_range: - min: 3158016 - max: 140960032 + min: 5255168 + max: 142499408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: jgdx8v1ep + job_id: jp8qel3xp job_status: Passed torchscript_onnx_qnn: - inference_time: 42368.0 - throughput: 23.602719033232628 + inference_time: 42452.0 + throughput: 23.55601620653915 estimated_peak_memory_range: min: 0 - max: 136134240 + max: 134929920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: jp8q67yxp + job_id: jgz3l1xk5 job_status: Passed torchscript_onnx: - inference_time: 43233.0 - throughput: 23.13047903222076 + inference_time: 38208.0 + throughput: 26.172529313232832 estimated_peak_memory_range: - min: 8192 - max: 186501952 + min: 7548928 + max: 194206960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: j5we89e65 + job_id: jpy1o9v7p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:34:49Z' + timestamp: '2024-12-12T00:03:29Z' - torchscript_onnx_tflite: - inference_time: 69792.0 - throughput: 14.328289775332417 + inference_time: 67862.0 + throughput: 14.735787333117209 estimated_peak_memory_range: - min: 3248128 - max: 44863168 + min: 110592 + max: 123790952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: j57ykdrl5 + job_id: jgke2jl2g job_status: Passed torchscript_onnx_qnn: - inference_time: 63641.0 - throughput: 15.713140899734448 + inference_time: 63227.0 + throughput: 15.816027962737438 estimated_peak_memory_range: - min: 397312 - max: 1744960 + min: 512000 + max: 1661696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: jgkeoyx2g + job_id: j5weljm65 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:34:37Z' + timestamp: '2024-12-12T00:03:18Z' - torchscript_onnx_tflite: - inference_time: 3548908.0 - throughput: 0.28177681698144896 + inference_time: 3548975.0 + throughput: 0.28177149740418006 estimated_peak_memory_range: - min: 0 - max: 136943552 + min: 49152 + max: 136853808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: jp4lmwrv5 + job_id: j5q6lj74p job_status: Passed torchscript_onnx_qnn: - inference_time: 3549141.0 - throughput: 0.28175831842127436 + inference_time: 3549260.0 + throughput: 0.2817488715957693 estimated_peak_memory_range: - min: 835584 - max: 6382528 + min: 663552 + max: 11339696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: jglvokm85 + job_id: jp14nrq2p job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:34:40Z' + timestamp: '2024-12-12T00:03:21Z' - torchscript_onnx_tflite: - inference_time: 69900.0 - throughput: 14.30615164520744 + inference_time: 68571.0 + throughput: 14.583424479736331 estimated_peak_memory_range: - min: 81920 - max: 230831048 + min: 3252224 + max: 44035280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: jpxk31o15 + job_id: jglvyj085 job_status: Passed torchscript_onnx_qnn: - inference_time: 63964.0 - throughput: 15.633794009130135 + inference_time: 62687.0 + throughput: 15.952270805749198 estimated_peak_memory_range: - min: 356352 - max: 1697464 + min: 434176 + max: 1581992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: j56yr140p + job_id: j57yeqvl5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:34:41Z' + timestamp: '2024-12-12T00:03:22Z' - torchscript_onnx_tflite: - inference_time: 112147.0 - throughput: 8.916868039269886 + inference_time: 112178.0 + throughput: 8.91440389381162 estimated_peak_memory_range: - min: 3182592 - max: 131632608 + min: 3235840 + max: 132232208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: j5mnozxwp + job_id: j56y8k30p job_status: Passed torchscript_onnx_qnn: - inference_time: 111111.0 - throughput: 9.000009000009 + inference_time: 111087.0 + throughput: 9.001953423892985 estimated_peak_memory_range: - min: 417792 - max: 6241328 + min: 2764800 + max: 9227536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: jp3jxm0lg + job_id: jp4lyzjv5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:34:42Z' + timestamp: '2024-12-12T00:03:23Z' - torchscript_onnx_tflite: - inference_time: 66467.0 - throughput: 15.04505995456392 + inference_time: 69239.0 + throughput: 14.442727364635537 estimated_peak_memory_range: min: 102400 - max: 122926048 + max: 223829216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: jgn6oevr5 + job_id: jp3jzy4lg job_status: Passed torchscript_onnx_qnn: - inference_time: 63164.0 - throughput: 15.83180292571718 + inference_time: 63532.0 + throughput: 15.740099477428698 estimated_peak_memory_range: - min: 372736 - max: 1957696 + min: 405504 + max: 1740208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: jgo2ov6xp + job_id: jpxklwe15 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:34:44Z' + timestamp: '2024-12-12T00:03:24Z' - torchscript_onnx_tflite: - inference_time: 131599.0 - throughput: 7.5988419364888795 + inference_time: 131596.0 + throughput: 7.599015167634275 estimated_peak_memory_range: - min: 3182592 - max: 142428576 + min: 3260416 + max: 142384032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: jprvoy39g + job_id: jgo2lj1xp job_status: Passed torchscript_onnx_qnn: - inference_time: 131068.0 - throughput: 7.629627368999298 + inference_time: 131035.0 + throughput: 7.631548822833594 estimated_peak_memory_range: - min: 438272 - max: 6263104 + min: 421888 + max: 6714384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: jpv6ewkj5 + job_id: j5mn0jvwp job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:34:45Z' + timestamp: '2024-12-12T00:03:25Z' - torchscript_onnx_tflite: - inference_time: 133506.0 - throughput: 7.49030006142046 + inference_time: 134344.0 + throughput: 7.443577681176681 estimated_peak_memory_range: - min: 3284992 - max: 110913136 + min: 3244032 + max: 112096640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: jp2k4my4p + job_id: jpv6lj1j5 job_status: Passed torchscript_onnx_qnn: - inference_time: 128638.0 - throughput: 7.773752701379064 + inference_time: 122541.0 + throughput: 8.16053402534662 estimated_peak_memory_range: - min: 290816 - max: 91639056 + min: 315392 + max: 91412576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: jgjvolnxg + job_id: jgn6zjrr5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,13 +471,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:34:46Z' + timestamp: '2024-12-12T00:03:26Z' - torchscript_onnx_qnn: - inference_time: 64855.0 - throughput: 15.419011641353789 + inference_time: 64934.0 + throughput: 15.400252564142052 estimated_peak_memory_range: - min: 221184 - max: 221184 + min: 204800 + max: 204800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: j5q6z2q4p + job_id: jg9lz69lg job_status: Passed torchscript_onnx: - inference_time: 65698.0 - throughput: 15.221163505738378 + inference_time: 65674.0 + throughput: 15.226725949386363 estimated_peak_memory_range: - min: 39751680 - max: 39751680 + min: 41005056 + max: 41005056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jg9lk4llg + job_id: jp0zmne65 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:34:51Z' + timestamp: '2024-12-12T00:03:30Z' diff --git a/qai_hub_models/models/face_attrib_net/README.md b/qai_hub_models/models/face_attrib_net/README.md index 78897d1d..9771c0eb 100644 --- a/qai_hub_models/models/face_attrib_net/README.md +++ b/qai_hub_models/models/face_attrib_net/README.md @@ -1,11 +1,11 @@ [![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md) -# [FaceAttribNet: Comprehensive facial analysis by extracting face features](https://aihub.qualcomm.com/models/face_attrib_net) +# [Facial-Attribute-Detection: Comprehensive facial analysis by extracting face features](https://aihub.qualcomm.com/models/face_attrib_net) Facial feature extraction and additional attributes including liveness, eyeclose, mask and glasses detection for face recognition. -This is based on the implementation of FaceAttribNet found [here](https://github.com/quic/ai-hub-models/blob/main/qai_hub_models/models/face_attrib_net/model.py). This repository contains scripts for optimized on-device +This is based on the implementation of Facial-Attribute-Detection found [here](https://github.com/quic/ai-hub-models/blob/main/qai_hub_models/models/face_attrib_net/model.py). This repository contains scripts for optimized on-device export suitable to run on Qualcomm® devices. More details on model performance accross various devices, can be found [here](https://aihub.qualcomm.com/models/face_attrib_net). @@ -40,7 +40,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License -* The license for the original implementation of FaceAttribNet can be found +* The license for the original implementation of Facial-Attribute-Detection can be found [here](https://github.com/qcom-ai-hub/ai-hub-models-internal/blob/main/LICENSE). * The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) diff --git a/qai_hub_models/models/face_attrib_net/__init__.py b/qai_hub_models/models/face_attrib_net/__init__.py index 6536b8ca..c50a626a 100644 --- a/qai_hub_models/models/face_attrib_net/__init__.py +++ b/qai_hub_models/models/face_attrib_net/__init__.py @@ -2,6 +2,5 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- -from .app import FaceAttribNetApp as App # noqa: F401 from .model import MODEL_ID # noqa: F401 from .model import FaceAttribNet as Model # noqa: F401 diff --git a/qai_hub_models/models/face_attrib_net/demo.py b/qai_hub_models/models/face_attrib_net/demo.py index ebea066d..eca326a0 100644 --- a/qai_hub_models/models/face_attrib_net/demo.py +++ b/qai_hub_models/models/face_attrib_net/demo.py @@ -2,61 +2,14 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- -import json -from pathlib import Path - -from qai_hub_models.models.face_attrib_net.app import FaceAttribNetApp -from qai_hub_models.models.face_attrib_net.model import ( - MODEL_ASSET_VERSION, - MODEL_ID, - OUT_NAMES, - FaceAttribNet, -) -from qai_hub_models.utils.args import ( - demo_model_from_cli_args, - get_model_cli_parser, - get_on_device_demo_parser, - validate_on_device_demo_args, -) -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image - -INPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, "img_sample.bmp" +from qai_hub_models.models._shared.face_attrib_net.demo import ( + face_attrib_net_demo as demo_main, ) +from qai_hub_models.models.face_attrib_net.model import FaceAttribNet -# Run FaceAttribNet end-to-end on a sample image. -def main(): - # Demo parameters - parser = get_model_cli_parser(FaceAttribNet) - parser = get_on_device_demo_parser(parser, add_output_dir=True) - parser.add_argument( - "--image", - type=str, - default=INPUT_IMAGE_ADDRESS, - help="image file path or URL", - ) - args = parser.parse_args([]) - model = demo_model_from_cli_args(FaceAttribNet, MODEL_ID, args) - validate_on_device_demo_args(args, MODEL_ID) - - # Load image - _, _, height, width = FaceAttribNet.get_input_spec()["image"][0] - orig_image = load_image(args.image) - print("Model loaded") - - app = FaceAttribNetApp(model) - output = app.run_inference_on_image(orig_image) - out_dict = {} - for i in range(len(output)): - out_dict[OUT_NAMES[i]] = list(output[i].astype(float)) - - output_path = ( - args.output_dir or str(Path() / "build") - ) + "/FaceAttribNet_output.json" - with open(output_path, "w", encoding="utf-8") as wf: - json.dump(out_dict, wf, ensure_ascii=False, indent=4) - print(f"Model outputs are saved at: {output_path}") +def main(is_test: bool = False): + demo_main(FaceAttribNet, is_test) if __name__ == "__main__": diff --git a/qai_hub_models/models/face_attrib_net/export.py b/qai_hub_models/models/face_attrib_net/export.py index 6c8d4b92..e9349bfa 100644 --- a/qai_hub_models/models/face_attrib_net/export.py +++ b/qai_hub_models/models/face_attrib_net/export.py @@ -94,7 +94,7 @@ def export_model( if not can_access_qualcomm_ai_hub(): return export_without_hub_access( "face_attrib_net", - "FaceAttribNet", + "Facial-Attribute-Detection", device or f"Device (Chipset {chipset})", skip_profiling, skip_inferencing, diff --git a/qai_hub_models/models/face_attrib_net/info.yaml b/qai_hub_models/models/face_attrib_net/info.yaml index b7735d8e..ba09c88c 100644 --- a/qai_hub_models/models/face_attrib_net/info.yaml +++ b/qai_hub_models/models/face_attrib_net/info.yaml @@ -1,4 +1,4 @@ -name: FaceAttribNet +name: Facial-Attribute-Detection # id must match with the model dir name in qai_hub_models id: face_attrib_net status: public @@ -14,6 +14,7 @@ source_repo: https://github.com/quic/ai-hub-models/blob/main/qai_hub_models/mode technical_details: Model checkpoint: multitask_FR_state_dict.pt Input resolution: 128x128 + Input channel number: 1 Number of parameters: 11.6M Model size: 47.6MB applicable_scenarios: diff --git a/qai_hub_models/models/face_attrib_net/model.py b/qai_hub_models/models/face_attrib_net/model.py index 4b147b31..28e89f29 100644 --- a/qai_hub_models/models/face_attrib_net/model.py +++ b/qai_hub_models/models/face_attrib_net/model.py @@ -7,6 +7,7 @@ import torch import torch.nn as nn +from qai_hub_models.models._shared.face_attrib_net.model import FaceNet from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_torch from qai_hub_models.utils.base_model import BaseModel from qai_hub_models.utils.input_spec import InputSpec @@ -55,7 +56,7 @@ def from_pretrained(cls, checkpoint_path: str | None = None): faceattribnet_model.to(torch.device("cpu")) return cls(faceattribnet_model) - def forward(self, image: torch.Tensor): + def forward(self, image): """ Run FaceAttribNet on cropped and pre-processed 128x128 face `image`, and produce various attributes. @@ -84,639 +85,3 @@ def get_input_spec( @staticmethod def get_output_names() -> list[str]: return OUT_NAMES - - -class FaceNet(nn.Module): - def __init__( - self, - chan, - blks_per_layer, - fea_only=True, - liveness=True, - openness=True, - glasses=True, - mask=True, - sunglasses=True, - group_size=32, - activ_type="prelu", - ): - super().__init__() - - self.head_converter = nn.Conv2d( - 3, 1, 1, stride=1, padding=0, groups=1, bias=False - ) - self.chan = chan - self.head = HeadBlock(chan) - self.blks_per_layer = blks_per_layer - self.fea_only = fea_only - - self.main_module = nn.ModuleList() - for i in range(len(self.blks_per_layer)): - self.main_module.append(self._make_net(self.chan, self.blks_per_layer[i])) - self.chan *= 2 - - self.embed = EmbedBlock(self.chan) - - self.liveness = liveness - if self.liveness: - self.base_chan = chan - self.liveness_bran1_gconv = Conv2dBlock( - self.base_chan * 4, - self.base_chan * 2, - 3, - padding=1, - stride=2, - group=self.base_chan * 4 // group_size, - norm="bn", - activ="none", - ) - self.liveness_bran1_conv = Conv2dBlock( - self.base_chan * 2, - self.base_chan * 2, - 1, - padding=0, - stride=1, - group=1, - norm="bn", - activ=activ_type, - ) - self.liveness_bran2_gconv = Conv2dBlock( - self.base_chan * 10, - self.base_chan * 5, - 3, - padding=1, - stride=2, - group=self.base_chan * 10 // group_size, - norm="bn", - activ="none", - ) - self.liveness_bran2_conv = Conv2dBlock( - self.base_chan * 5, - self.base_chan, - 1, - padding=0, - stride=1, - group=1, - norm="bn", - activ=activ_type, - ) - self.liveness_fc = nn.Linear(self.base_chan * 4 * 4, self.base_chan // 2) - - self.openness = openness - if self.openness: - self.base_chan = chan - self.openness_bran1_gconv = Conv2dBlock( - self.base_chan * 4, - self.base_chan * 2, - 3, - padding=1, - stride=2, - group=self.base_chan * 4 // group_size, - norm="none", - activ="none", - ) - self.openness_bran1_conv = Conv2dBlock( - self.base_chan * 2, - self.base_chan * 2, - 1, - padding=0, - stride=1, - group=1, - norm="bn", - activ=activ_type, - ) - self.openness_bran2_gconv = Conv2dBlock( - self.base_chan * 10, - self.base_chan * 5, - 3, - padding=1, - stride=2, - group=self.base_chan * 10 // group_size, - norm="none", - activ="none", - ) - self.openness_bran2_conv = Conv2dBlock( - self.base_chan * 5, - self.base_chan * 2, - 1, - padding=0, - stride=1, - group=1, - norm="bn", - activ=activ_type, - ) - self.openness_ave = nn.AvgPool2d(kernel_size=4, stride=1) - self.openness_cls = nn.Linear(self.base_chan * 2, 2) - - self.glasses = glasses - if self.glasses: - self.base_chan = chan - self.eyeglasses_bran1_gconv = Conv2dBlock( - self.base_chan * 4, - self.base_chan * 2, - 3, - padding=1, - stride=2, - group=self.base_chan * 4 // group_size, - norm="none", - activ="none", - ) - self.eyeglasses_bran1_conv = Conv2dBlock( - self.base_chan * 2, - self.base_chan * 2, - 1, - padding=0, - stride=1, - group=1, - norm="bn", - activ=activ_type, - ) - self.eyeglasses_bran2_gconv = Conv2dBlock( - self.base_chan * 10, - self.base_chan * 5, - 3, - padding=1, - stride=2, - group=self.base_chan * 10 // group_size, - norm="none", - activ="none", - ) - self.eyeglasses_bran2_conv = Conv2dBlock( - self.base_chan * 5, - self.base_chan * 2, - 1, - padding=0, - stride=1, - group=1, - norm="bn", - activ=activ_type, - ) - self.eyeglasses_ave = nn.AvgPool2d(kernel_size=4, stride=1) - self.eyeglasses_cls = nn.Linear(self.base_chan * 2, 2) - - self.sunglasses = sunglasses - if self.sunglasses: - self.base_chan = chan - self.sunglasses_bran1_gconv = Conv2dBlock( - self.base_chan * 4, - self.base_chan * 2, - 3, - padding=1, - stride=2, - group=self.base_chan * 4 // group_size, - norm="none", - activ="none", - ) - self.sunglasses_bran1_conv = Conv2dBlock( - self.base_chan * 2, - self.base_chan * 2, - 1, - padding=0, - stride=1, - group=1, - norm="bn", - activ=activ_type, - ) - self.sunglasses_bran2_gconv = Conv2dBlock( - self.base_chan * 10, - self.base_chan * 5, - 3, - padding=1, - stride=2, - group=self.base_chan * 10 // group_size, - norm="none", - activ="none", - ) - self.sunglasses_bran2_conv = Conv2dBlock( - self.base_chan * 5, - self.base_chan * 2, - 1, - padding=0, - stride=1, - group=1, - norm="bn", - activ=activ_type, - ) - self.sunglasses_ave = nn.AvgPool2d(kernel_size=4, stride=1) - self.sunglasses_cls = nn.Linear(self.base_chan * 2, 2) - self.sunglasses_softmax = nn.Softmax(dim=1) - - self.mask = mask - if self.mask: - self.base_chan = chan - self.mask_bran1_gconv = Conv2dBlock( - self.base_chan * 4, - self.base_chan * 2, - 3, - padding=1, - stride=2, - group=self.base_chan * 4 // group_size, - norm="none", - activ="none", - ) - self.mask_bran1_conv = Conv2dBlock( - self.base_chan * 2, - self.base_chan * 2, - 1, - padding=0, - stride=1, - group=1, - norm="bn", - activ=activ_type, - ) - self.mask_bran2_gconv = Conv2dBlock( - self.base_chan * 10, - self.base_chan * 5, - 3, - padding=1, - stride=2, - group=self.base_chan * 10 // group_size, - norm="none", - activ="none", - ) - self.mask_bran2_conv = Conv2dBlock( - self.base_chan * 5, - self.base_chan * 2, - 1, - padding=0, - stride=1, - group=1, - norm="bn", - activ=activ_type, - ) - self.mask_ave = nn.AvgPool2d(kernel_size=4, stride=1) - self.mask_cls = nn.Linear(self.base_chan * 2, 2) - - def _make_net(self, chan, n): - cnn_x = [] - cnn_x += [DownsampleBlock(chan)] - for i in range(n - 1): - cnn_x += [NormalBlock(2 * chan)] - cnn_x = nn.Sequential(*cnn_x) - return cnn_x - - def forward(self, x, target=None): - x = self.head_converter(x) - fea1 = self.head(x) - - fea2 = self.main_module[0](fea1) - fea3 = self.main_module[1](fea2) - fea4 = self.main_module[2](fea3) - - fr_out = self.embed(fea4) - - outputs = [] - outputs.append(fr_out) - - if self.liveness: - a = self.liveness_bran1_conv(self.liveness_bran1_gconv(fea3)) - a = torch.cat((a, fea4), dim=1) - a = self.liveness_bran2_conv(self.liveness_bran2_gconv(a)) - a = a.flatten(start_dim=1) - a = self.liveness_fc(a) - outputs.append(a) - - if self.openness: - a = self.openness_bran1_conv(self.openness_bran1_gconv(fea3)) - a = torch.cat((a, fea4), dim=1) - a = self.openness_ave( - self.openness_bran2_conv(self.openness_bran2_gconv(a)) - ) - a = a.flatten(start_dim=1) - a = self.openness_cls(a) - outputs.append(a) - - if self.glasses: - a = self.eyeglasses_bran1_gconv(fea3) - a = self.eyeglasses_bran1_conv(a) - a = torch.cat((a, fea4), dim=1) - a = self.eyeglasses_ave( - self.eyeglasses_bran2_conv(self.eyeglasses_bran2_gconv(a)) - ) - a = a.flatten(start_dim=1) - a = self.eyeglasses_cls(a) - outputs.append(a) - - if self.mask: - a = self.mask_bran1_gconv(fea3) - a = self.mask_bran1_conv(a) - a = torch.cat((a, fea4), dim=1) - a = self.mask_ave(self.mask_bran2_conv(self.mask_bran2_gconv(a))) - a = a.flatten(start_dim=1) - a = self.mask_cls(a) - outputs.append(a) - - if self.sunglasses: - a = self.sunglasses_bran1_conv(self.sunglasses_bran1_gconv(fea3)) - a = torch.cat((a, fea4), dim=1) - a = self.sunglasses_ave( - self.sunglasses_bran2_conv(self.sunglasses_bran2_gconv(a)) - ) - a = a.flatten(start_dim=1) - a = self.sunglasses_cls(a) - a = self.sunglasses_softmax(a) - outputs.append(a) - - return outputs - - -########################################### -# HeadBlock, DownsampleBlock, NormalBlock, and EmbedBlock -########################################## -# HeadBlock -class HeadBlock(nn.Module): - def __init__(self, chan, group_size=32, activ_type="prelu"): - super().__init__() - self.conv = Conv2dBlock( - 1, chan, 3, padding=1, stride=1, group=1, norm="bn", activ=activ_type - ) - self.bran1 = [ - Conv2dBlock( - chan, - chan, - 3, - padding=1, - stride=2, - group=chan // group_size, - norm="none", - activ="none", - ), - Conv2dBlock( - chan, chan, 1, padding=0, stride=1, group=1, norm="bn", activ=activ_type - ), - Conv2dBlock( - chan, - chan, - 3, - padding=1, - stride=1, - group=chan // group_size, - norm="none", - activ="none", - ), - Conv2dBlock( - chan, chan, 1, padding=0, stride=1, group=1, norm="bn", activ="none" - ), - ] - - self.bran1 = nn.Sequential(*self.bran1) - - self.bran2 = [ - Conv2dBlock( - chan, - chan, - 3, - padding=1, - stride=2, - group=chan // group_size, - norm="none", - activ="none", - ), - Conv2dBlock( - chan, chan, 1, padding=0, stride=1, group=1, norm="bn", activ="none" - ), - ] - self.bran2 = nn.Sequential(*self.bran2) - - if activ_type == "prelu": - self.activ = nn.PReLU() - elif activ_type == "relu": - self.activ = nn.ReLU() - else: - self.activ = None - assert 0, f"Unsupported activation function: {activ_type}" - - def forward(self, x): - x = self.conv(x) - x = self.bran1(x) + self.bran2(x) - if self.activ: - x = self.activ(x) - return x - - -# DownsampleBlock -class DownsampleBlock(nn.Module): - def __init__(self, chan, group_size=32, activ_type="prelu"): - super().__init__() - assert ( - chan % group_size == 0 - ), f"chan {chan:d} cannot be divided by group_size {group_size:d}" - - self.bran1 = nn.Sequential( - Conv2dBlock( - chan, - 2 * chan, - 3, - padding=1, - stride=2, - group=chan // group_size, - norm="none", - activ="none", - ), - Conv2dBlock( - 2 * chan, - 2 * chan, - 1, - padding=0, - stride=1, - group=1, - norm="bn", - activ=activ_type, - ), - ) - - self.bran2 = [ - Conv2dBlock( - chan, - 4 * chan, - 3, - padding=1, - stride=2, - group=chan // group_size, - norm="none", - activ="none", - ), - Conv2dBlock( - 4 * chan, - 2 * chan, - 1, - padding=0, - stride=1, - group=1, - norm="bn", - activ=activ_type, - ), - Conv2dBlock( - 2 * chan, - 4 * chan, - 3, - padding=1, - stride=1, - group=chan // group_size, - norm="none", - activ="none", - ), - Conv2dBlock( - 4 * chan, - 2 * chan, - 1, - padding=0, - stride=1, - group=1, - norm="bn", - activ="none", - ), - ] - - self.bran2 = nn.Sequential(*self.bran2) - - if activ_type == "prelu": - self.activ = nn.PReLU() - elif activ_type == "relu": - self.activ = nn.ReLU() - else: - self.activ = None - assert 0, f"Unsupported activation function: {activ_type}" - - def forward(self, x): - x = self.bran1(x) + self.bran2(x) - if self.activ: - x = self.activ(x) - return x - - -# NormalBlock -class NormalBlock(nn.Module): - def __init__(self, chan, group_size=32, activ_type="prelu"): - super().__init__() - assert ( - chan % group_size == 0 - ), f"chan {chan:d} cannot be divided by group_size {group_size:d}" - self.model = [ - Conv2dBlock( - chan, - 2 * chan, - 3, - padding=1, - stride=1, - group=chan // group_size, - norm="none", - activ="none", - ), - Conv2dBlock( - 2 * chan, - chan, - 1, - padding=0, - stride=1, - group=1, - norm="bn", - activ="prelu", - ), - Conv2dBlock( - chan, - 2 * chan, - 3, - padding=1, - stride=1, - group=chan // group_size, - norm="none", - activ="none", - ), - Conv2dBlock( - 2 * chan, chan, 1, padding=0, stride=1, group=1, norm="bn", activ="none" - ), - ] - - self.model = nn.Sequential(*self.model) - - if activ_type == "prelu": - self.activ = nn.PReLU() - elif activ_type == "relu": - self.activ = nn.ReLU() - else: - self.activ = None - assert 0, f"Unsupported activation function: {activ_type}" - - def forward(self, x): - x = self.model(x) + x - if self.activ: - x = self.activ(x) - return x - - -class EmbedBlock(nn.Module): - def __init__(self, chan, group_size=32, activ_type="prelu"): - super().__init__() - self.model = nn.Sequential( - Conv2dBlock( - chan, - chan, - 8, - padding=0, - stride=1, - group=chan // group_size, - norm="none", - activ="none", - ), - Conv2dBlock( - chan, chan, 1, padding=0, stride=1, group=1, norm="bn", activ=activ_type - ), - ) - - def forward(self, x): - x = self.model(x) - return x.flatten(start_dim=1) - - -########################################### -# Basic Blocks -########################################## -class Conv2dBlock(nn.Module): - def __init__( - self, - in_chan, - out_chan, - kernel_size, - padding=0, - stride=1, - group=1, - norm="none", - activ="none", - ): - super().__init__() - self.conv = nn.Conv2d( - in_chan, - out_chan, - kernel_size, - stride=stride, - padding=padding, - groups=group, - bias=False, - ) - - if norm == "bn": - self.norm = nn.BatchNorm2d(out_chan) - elif norm == "none": - self.norm = None - else: - assert 0, f"Unsupported normalization: {norm}" - - if activ == "prelu": - self.activ = nn.PReLU() - elif activ == "relu": - self.activ = nn.ReLU() - elif activ == "sigmoid": - self.activ = nn.Sigmoid() - elif activ == "none": - self.activ = None - else: - assert 0, f"Unsupported activation layer: {activ}" - - def forward(self, x): - x = self.conv(x) - if self.norm: - x = self.norm(x) - if self.activ: - x = self.activ(x) - return x diff --git a/qai_hub_models/models/face_attrib_net/perf.yaml b/qai_hub_models/models/face_attrib_net/perf.yaml index 2d2800e3..a9993ea1 100644 --- a/qai_hub_models/models/face_attrib_net/perf.yaml +++ b/qai_hub_models/models/face_attrib_net/perf.yaml @@ -42,17 +42,17 @@ aggregated: - SA8295P - SA8650P Proxy models: -- name: FaceAttribNet +- name: Facial-Attribute-Detection universal_assets: - torchscript_onnx_tflite: mqpzyz40n - torchscript_onnx: mno303wvn + torchscript_onnx_tflite: mqp3z71vm + torchscript_onnx: mmd3y2evn performance_metrics: - torchscript_onnx_tflite: - inference_time: 958.0 - throughput: 1043.8413361169103 + inference_time: 982.0 + throughput: 1018.3299389002036 estimated_peak_memory_range: - min: 24576 - max: 111550104 + min: 20480 + max: 111407904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 161 - job_id: j5q6z3k4p + job_id: jgn6zjem5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1035.0 - throughput: 966.1835748792271 + inference_time: 1024.0 + throughput: 976.5625 estimated_peak_memory_range: - min: 172032 - max: 69236952 + min: 16384 + max: 79472456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jgdx8olep + job_id: jgo2ljvdp job_status: Passed torchscript_onnx: - inference_time: 1221.0 - throughput: 819.000819000819 + inference_time: 1210.0 + throughput: 826.4462809917355 estimated_peak_memory_range: - min: 20480 - max: 31320672 + min: 12288 + max: 30957968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 163 - job_id: j5q6z3o4p + job_id: jp14nrd2p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:33:45Z' + timestamp: '2024-12-12T00:02:29Z' - torchscript_onnx_tflite: - inference_time: 728.0 - throughput: 1373.6263736263736 + inference_time: 731.0 + throughput: 1367.9890560875513 estimated_peak_memory_range: min: 16384 - max: 22353648 + max: 26119568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 161 - job_id: jglvo3z85 + job_id: jprvlzyeg job_status: Passed torchscript_onnx_qnn: - inference_time: 778.0 - throughput: 1285.3470437017995 + inference_time: 785.0 + throughput: 1273.8853503184714 estimated_peak_memory_range: - min: 0 - max: 24407104 + min: 212992 + max: 25869184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: j57ykx3l5 + job_id: jpv6ljwm5 job_status: Passed torchscript_onnx: - inference_time: 935.0 - throughput: 1069.51871657754 + inference_time: 946.0 + throughput: 1057.0824524312895 estimated_peak_memory_range: min: 0 - max: 119075104 + max: 120245744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 163 - job_id: jglvo3r85 + job_id: jgdxdjrep job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:33:46Z' + timestamp: '2024-12-12T00:02:30Z' - torchscript_onnx_tflite: - inference_time: 658.0 - throughput: 1519.756838905775 + inference_time: 677.0 + throughput: 1477.1048744460857 estimated_peak_memory_range: min: 12288 - max: 62457600 + max: 25060976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 161 - job_id: jgz3xyv65 + job_id: jpy1o9d4p job_status: Passed torchscript_onnx_qnn: - inference_time: 651.0 - throughput: 1536.0983102918588 + inference_time: 738.0 + throughput: 1355.0135501355014 estimated_peak_memory_range: min: 0 - max: 21793840 + max: 24391872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jp4lmv0v5 + job_id: jgjvrjl8g job_status: Passed torchscript_onnx: - inference_time: 866.0 - throughput: 1154.7344110854503 + inference_time: 860.0 + throughput: 1162.7906976744187 estimated_peak_memory_range: min: 0 - max: 65067200 + max: 66718320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 163 - job_id: j56yrnl0p + job_id: j57yeqjl5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:33:47Z' + timestamp: '2024-12-12T00:02:31Z' - torchscript_onnx_tflite: - inference_time: 949.0 - throughput: 1053.740779768177 + inference_time: 950.0 + throughput: 1052.6315789473683 estimated_peak_memory_range: min: 16384 - max: 111431632 + max: 111576488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 161 - job_id: jp3jxe3lg + job_id: jp0zmnre5 job_status: Passed torchscript_onnx_qnn: - inference_time: 984.0 - throughput: 1016.260162601626 + inference_time: 997.0 + throughput: 1003.0090270812437 estimated_peak_memory_range: - min: 233472 - max: 1404176 + min: 241664 + max: 1465096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jpxk3y215 + job_id: jped7jv05 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:33:35Z' - - torchscript_onnx_qnn: - inference_time: 30142.0 - throughput: 33.17629885210006 + timestamp: '2024-12-12T00:02:20Z' + - torchscript_onnx_tflite: + inference_time: 29939.0 + throughput: 33.40124920672033 estimated_peak_memory_range: - min: 249856 - max: 5917168 + min: 53248 + max: 22743344 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 161 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 161 + job_id: jp8qel78p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 30127.0 + throughput: 33.192817074385104 + estimated_peak_memory_range: + min: 319488 + max: 10795104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jgn6o38r5 + job_id: j5welj9j5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -266,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:33:38Z' + timestamp: '2024-12-12T00:02:22Z' - torchscript_onnx_tflite: - inference_time: 967.0 - throughput: 1034.126163391934 + inference_time: 971.0 + throughput: 1029.8661174047375 estimated_peak_memory_range: - min: 16384 - max: 111469344 + min: 0 + max: 111429912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 161 - job_id: jpv6evoj5 + job_id: jgke2jyog job_status: Passed torchscript_onnx_qnn: - inference_time: 1006.0 - throughput: 994.0357852882704 + inference_time: 1008.0 + throughput: 992.063492063492 estimated_peak_memory_range: - min: 241664 - max: 1494656 + min: 270336 + max: 1984160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jprvoej9g + job_id: jg9lz64vg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -304,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:33:39Z' + timestamp: '2024-12-12T00:02:23Z' - torchscript_onnx_tflite: - inference_time: 1666.0 - throughput: 600.2400960384153 + inference_time: 1672.0 + throughput: 598.0861244019138 estimated_peak_memory_range: min: 16384 - max: 17670976 + max: 21336384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 161 - job_id: jgjvoemxg + job_id: j5q6lj2mp job_status: Passed torchscript_onnx_qnn: - inference_time: 1902.0 - throughput: 525.7623554153523 + inference_time: 1738.0 + throughput: 575.3739930955121 estimated_peak_memory_range: - min: 212992 - max: 6232064 + min: 0 + max: 5834144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jp2k4ln4p + job_id: jp14nr8lp job_status: Passed reference_device_info: name: SA8295P ADP @@ -342,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:33:40Z' + timestamp: '2024-12-12T00:02:24Z' - torchscript_onnx_tflite: - inference_time: 958.0 - throughput: 1043.8413361169103 + inference_time: 969.0 + throughput: 1031.9917440660474 estimated_peak_memory_range: - min: 20480 - max: 110964384 + min: 16384 + max: 111183808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 161 - job_id: jgz38r9k5 + job_id: jglvyjkl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1004.0 - throughput: 996.01593625498 + inference_time: 1002.0 + throughput: 998.003992015968 estimated_peak_memory_range: - min: 229376 - max: 1465728 + min: 233472 + max: 1534360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jpy1q607p + job_id: jgdxdjvlp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -380,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:33:41Z' + timestamp: '2024-12-12T00:02:25Z' - torchscript_onnx_tflite: - inference_time: 1779.0 - throughput: 562.1135469364812 + inference_time: 1782.0 + throughput: 561.1672278338945 estimated_peak_memory_range: - min: 16384 - max: 21405472 + min: 0 + max: 24947632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 161 - job_id: j5we8qn65 + job_id: j56y8k17p job_status: Passed torchscript_onnx_qnn: - inference_time: 1958.0 - throughput: 510.7252298263534 + inference_time: 1990.0 + throughput: 502.51256281407035 estimated_peak_memory_range: - min: 0 - max: 5701392 + min: 217088 + max: 6369552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jp0zdl765 + job_id: j5weljd65 job_status: Passed reference_device_info: name: SA8775P ADP @@ -418,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:33:42Z' + timestamp: '2024-12-12T00:02:27Z' - torchscript_onnx_tflite: - inference_time: 1276.0 - throughput: 783.6990595611285 + inference_time: 1277.0 + throughput: 783.0853563038371 estimated_peak_memory_range: - min: 36864 - max: 23061312 + min: 16384 + max: 23776304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 161 - job_id: jg9lkwelg + job_id: jp3jzymzg job_status: Passed torchscript_onnx_qnn: - inference_time: 1340.0 - throughput: 746.2686567164179 + inference_time: 1357.0 + throughput: 736.9196757553427 estimated_peak_memory_range: - min: 204800 - max: 22860480 + min: 212992 + max: 23333456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jgkeo3m2g + job_id: jg9lz63lg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -456,13 +471,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:33:44Z' + timestamp: '2024-12-12T00:02:28Z' - torchscript_onnx_qnn: - inference_time: 1120.0 - throughput: 892.8571428571429 + inference_time: 1161.0 + throughput: 861.3264427217915 estimated_peak_memory_range: - min: 421888 - max: 421888 + min: 438272 + max: 438272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: j5mno3ywp + job_id: jgz3l1765 job_status: Passed torchscript_onnx: - inference_time: 1241.0 - throughput: 805.8017727639001 + inference_time: 1180.0 + throughput: 847.457627118644 estimated_peak_memory_range: - min: 27967488 - max: 27967488 + min: 28200960 + max: 28200960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -485,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 163 - job_id: jp3jxe2lg + job_id: jp4lyzxv5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -494,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:33:48Z' + timestamp: '2024-12-12T00:02:32Z' diff --git a/qai_hub_models/models/face_attrib_net/test.py b/qai_hub_models/models/face_attrib_net/test.py index d2df2fcc..78d231cc 100644 --- a/qai_hub_models/models/face_attrib_net/test.py +++ b/qai_hub_models/models/face_attrib_net/test.py @@ -2,9 +2,11 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- -from qai_hub_models.models.face_attrib_net.app import FaceAttribNetApp -from qai_hub_models.models.face_attrib_net.demo import INPUT_IMAGE_ADDRESS -from qai_hub_models.models.face_attrib_net.demo import main as demo_main +from qai_hub_models.models._shared.face_attrib_net.app import FaceAttribNetApp +from qai_hub_models.models._shared.face_attrib_net.demo import INPUT_IMAGE_ADDRESS +from qai_hub_models.models._shared.face_attrib_net.demo import ( + face_attrib_net_demo as demo_main, +) from qai_hub_models.models.face_attrib_net.model import ( MODEL_ASSET_VERSION, MODEL_ID, @@ -43,4 +45,4 @@ def test_task(): def test_demo(): - demo_main() + demo_main(FaceAttribNet) diff --git a/qai_hub_models/models/face_attrib_net_quantized/README.md b/qai_hub_models/models/face_attrib_net_quantized/README.md new file mode 100644 index 00000000..c80010e0 --- /dev/null +++ b/qai_hub_models/models/face_attrib_net_quantized/README.md @@ -0,0 +1,58 @@ +[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md) + + +# [Facial-Attribute-Detection-Quantized: Comprehensive facial analysis by extracting face features](https://aihub.qualcomm.com/models/face_attrib_net_quantized) + +Facial feature extraction and additional attributes including liveness, eyeclose, mask and glasses detection for face recognition. + +This is based on the implementation of Facial-Attribute-Detection-Quantized found [here](https://github.com/quic/ai-hub-models/blob/main/qai_hub_models/models/face_attrib_net/model.py). This repository contains scripts for optimized on-device +export suitable to run on Qualcomm® devices. More details on model performance +accross various devices, can be found [here](https://aihub.qualcomm.com/models/face_attrib_net_quantized). + +[Sign up](https://myaccount.qualcomm.com/signup) to start using Qualcomm AI Hub and run these models on a hosted Qualcomm® device. + + + + +## Example & Usage + + +Once installed, run the following simple CLI demo: + +```bash +python -m qai_hub_models.models.face_attrib_net_quantized.demo +``` +More details on the CLI tool can be found with the `--help` option. See +[demo.py](demo.py) for sample usage of the model including pre/post processing +scripts. Please refer to our [general instructions on using +models](../../../#getting-started) for more usage instructions. + +## Export for on-device deployment + +This repository contains export scripts that produce a model optimized for +on-device deployment. This can be run as follows: + +```bash +python -m qai_hub_models.models.face_attrib_net_quantized.export +``` +Additional options are documented with the `--help` option. Note that the above +script requires access to Deployment instructions for Qualcomm® AI Hub. + + +## License +* The license for the original implementation of Facial-Attribute-Detection-Quantized can be found + [here](https://github.com/qcom-ai-hub/ai-hub-models-internal/blob/main/LICENSE). +* The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) + + +## References +* [None](None) +* [Source Model Implementation](https://github.com/quic/ai-hub-models/blob/main/qai_hub_models/models/face_attrib_net/model.py) + + + +## Community +* Join [our AI Hub Slack community](https://aihub.qualcomm.com/community/slack) to collaborate, post questions and learn more about on-device AI. +* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com). + + diff --git a/qai_hub_models/models/face_attrib_net_quantized/__init__.py b/qai_hub_models/models/face_attrib_net_quantized/__init__.py new file mode 100644 index 00000000..11ce83d7 --- /dev/null +++ b/qai_hub_models/models/face_attrib_net_quantized/__init__.py @@ -0,0 +1,11 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- + +from qai_hub_models.models._shared.face_attrib_net.app import ( # noqa: F401 + FaceAttribNetApp as App, +) + +from .model import MODEL_ID # noqa: F401 +from .model import FaceAttribNetQuantizable as Model # noqa: F401 diff --git a/qai_hub_models/models/face_attrib_net_quantized/conftest.py b/qai_hub_models/models/face_attrib_net_quantized/conftest.py new file mode 100644 index 00000000..576c4123 --- /dev/null +++ b/qai_hub_models/models/face_attrib_net_quantized/conftest.py @@ -0,0 +1,37 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + +import inspect + +import pytest + +from qai_hub_models.models.face_attrib_net_quantized import Model + + +# Instantiate the model only once for all tests. +# Mock from_pretrained to always return the initialized model. +# This speeds up tests and limits memory leaks. +@pytest.fixture(scope="module", autouse=True) +def cached_from_pretrained(): + with pytest.MonkeyPatch.context() as mp: + pretrained_cache = {} + from_pretrained = Model.from_pretrained + sig = inspect.signature(from_pretrained) + + def _cached_from_pretrained(*args, **kwargs): + cache_key = str(args) + str(kwargs) + model = pretrained_cache.get(cache_key, None) + if model: + return model + else: + model = from_pretrained(*args, **kwargs) + pretrained_cache[cache_key] = model + return model + + _cached_from_pretrained.__signature__ = sig + + mp.setattr(Model, "from_pretrained", _cached_from_pretrained) + yield mp diff --git a/qai_hub_models/models/whisper_small_en/demo.py b/qai_hub_models/models/face_attrib_net_quantized/demo.py similarity index 56% rename from qai_hub_models/models/whisper_small_en/demo.py rename to qai_hub_models/models/face_attrib_net_quantized/demo.py index a290f016..07277434 100644 --- a/qai_hub_models/models/whisper_small_en/demo.py +++ b/qai_hub_models/models/face_attrib_net_quantized/demo.py @@ -2,12 +2,16 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- -from qai_hub_models.models._shared.whisper.demo import whisper_demo -from qai_hub_models.models.whisper_small_en.model import WhisperSmallEn +from qai_hub_models.models._shared.face_attrib_net.demo import ( + face_attrib_net_demo as demo_main, +) +from qai_hub_models.models.face_attrib_net_quantized.model import ( + FaceAttribNetQuantizable, +) def main(is_test: bool = False): - whisper_demo(WhisperSmallEn, is_test) + demo_main(FaceAttribNetQuantizable, is_test) if __name__ == "__main__": diff --git a/qai_hub_models/models/face_attrib_net_quantized/export.py b/qai_hub_models/models/face_attrib_net_quantized/export.py new file mode 100644 index 00000000..dcc1b3b8 --- /dev/null +++ b/qai_hub_models/models/face_attrib_net_quantized/export.py @@ -0,0 +1,216 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import os +import warnings +from pathlib import Path +from typing import Any, Optional, cast + +import qai_hub as hub + +from qai_hub_models.models.common import ExportResult, TargetRuntime +from qai_hub_models.models.face_attrib_net_quantized import Model +from qai_hub_models.utils.args import ( + export_parser, + get_input_spec_kwargs, + get_model_kwargs, +) +from qai_hub_models.utils.compare import torch_inference +from qai_hub_models.utils.printing import ( + print_inference_metrics, + print_on_target_demo_cmd, + print_profile_metrics_from_job, +) +from qai_hub_models.utils.qai_hub_helpers import ( + can_access_qualcomm_ai_hub, + export_without_hub_access, +) + + +def export_model( + device: Optional[str] = None, + chipset: Optional[str] = None, + skip_profiling: bool = False, + skip_inferencing: bool = False, + skip_downloading: bool = False, + skip_summary: bool = False, + output_dir: Optional[str] = None, + target_runtime: TargetRuntime = TargetRuntime.TFLITE, + compile_options: str = "", + profile_options: str = "", + **additional_model_kwargs, +) -> ExportResult | list[str]: + """ + This function executes the following recipe: + + 1. Instantiates a PyTorch model and converts it to a traced TorchScript format + 2. Compiles the model to an asset that can be run on device + 3. Profiles the model performance on a real device + 4. Inferences the model on sample inputs + 5. Downloads the model asset to the local directory + 6. Summarizes the results from profiling and inference + + Each of the last 4 steps can be optionally skipped using the input options. + + Parameters: + device: Device for which to export the model. + Full list of available devices can be found by running `hub.get_devices()`. + Defaults to DEFAULT_DEVICE if not specified. + chipset: If set, will choose a random device with this chipset. + Overrides the `device` argument. + skip_profiling: If set, skips profiling of compiled model on real devices. + skip_inferencing: If set, skips computing on-device outputs from sample data. + skip_downloading: If set, skips downloading of compiled model. + skip_summary: If set, skips waiting for and summarizing results + from profiling and inference. + output_dir: Directory to store generated assets (e.g. compiled model). + Defaults to `/build/`. + target_runtime: Which on-device runtime to target. Default is TFLite. + compile_options: Additional options to pass when submitting the compile job. + profile_options: Additional options to pass when submitting the profile job. + **additional_model_kwargs: Additional optional kwargs used to customize + `model_cls.from_pretrained` and `model.get_input_spec` + + Returns: + A struct of: + * A CompileJob object containing metadata about the compile job submitted to hub. + * An InferenceJob containing metadata about the inference job (None if inferencing skipped). + * A ProfileJob containing metadata about the profile job (None if profiling skipped). + """ + model_name = "face_attrib_net_quantized" + output_path = Path(output_dir or Path.cwd() / "build" / model_name) + if not device and not chipset: + raise ValueError("Device or Chipset must be provided.") + hub_device = hub.Device( + name=device or "", attributes=f"chipset:{chipset}" if chipset else None + ) + if not can_access_qualcomm_ai_hub(): + return export_without_hub_access( + "face_attrib_net_quantized", + "Facial-Attribute-Detection-Quantized", + device or f"Device (Chipset {chipset})", + skip_profiling, + skip_inferencing, + skip_downloading, + skip_summary, + output_path, + target_runtime, + compile_options, + profile_options, + ) + + # On-device perf improves with I/O in channel_last format for runtimes + # that execute natively in channel_last format. + use_channel_last_format = target_runtime.channel_last_native_execution + + # 1. Instantiates a PyTorch model and converts it to a traced TorchScript format + model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs)) + input_spec = model.get_input_spec( + **get_input_spec_kwargs(model, additional_model_kwargs) + ) + + # Trace the model + source_model = model.convert_to_hub_source_model( + target_runtime, output_path, input_spec + ) + + # 2. Compiles the model to an asset that can be run on device + model_compile_options = model.get_hub_compile_options( + target_runtime, compile_options, hub_device + ) + print(f"Optimizing model {model_name} to run on-device") + submitted_compile_job = hub.submit_compile_job( + model=source_model, + input_specs=input_spec, + device=hub_device, + name=model_name, + calibration_data=model.get_calibration_data(target_runtime), + options=model_compile_options, + ) + compile_job = cast(hub.client.CompileJob, submitted_compile_job) + + # 3. Profiles the model performance on a real device + profile_job: Optional[hub.client.ProfileJob] = None + if not skip_profiling: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print(f"Profiling model {model_name} on a hosted device.") + submitted_profile_job = hub.submit_profile_job( + model=compile_job.get_target_model(), + device=hub_device, + name=model_name, + options=profile_options_all, + ) + profile_job = cast(hub.client.ProfileJob, submitted_profile_job) + + # 4. Inferences the model on sample inputs + inference_job: Optional[hub.client.InferenceJob] = None + if not skip_inferencing: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print( + f"Running inference for {model_name} on a hosted device with example inputs." + ) + sample_inputs = model.sample_inputs( + input_spec, use_channel_last_format=use_channel_last_format + ) + submitted_inference_job = hub.submit_inference_job( + model=compile_job.get_target_model(), + inputs=sample_inputs, + device=hub_device, + name=model_name, + options=profile_options_all, + ) + inference_job = cast(hub.client.InferenceJob, submitted_inference_job) + + # 5. Downloads the model asset to the local directory + if not skip_downloading: + os.makedirs(output_path, exist_ok=True) + target_model: hub.Model = compile_job.get_target_model() # type: ignore + target_model.download(str(output_path / model_name)) + + # 6. Summarizes the results from profiling and inference + if not skip_summary and not skip_profiling: + assert profile_job is not None and profile_job.wait().success + profile_data: dict[str, Any] = profile_job.download_profile() # type: ignore + print_profile_metrics_from_job(profile_job, profile_data) + + if not skip_summary and not skip_inferencing: + sample_inputs = model.sample_inputs(use_channel_last_format=False) + torch_out = torch_inference( + model, sample_inputs, return_channel_last_output=use_channel_last_format + ) + assert inference_job is not None and inference_job.wait().success + inference_result: hub.client.DatasetEntries = inference_job.download_output_data() # type: ignore + + print_inference_metrics( + inference_job, inference_result, torch_out, model.get_output_names() + ) + + if not skip_summary: + print_on_target_demo_cmd(compile_job, Path(__file__).parent, hub_device) + + return ExportResult( + compile_job=compile_job, + inference_job=inference_job, + profile_job=profile_job, + ) + + +def main(): + warnings.filterwarnings("ignore") + parser = export_parser(model_cls=Model) + args = parser.parse_args() + export_model(**vars(args)) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/face_attrib_net_quantized/info.yaml b/qai_hub_models/models/face_attrib_net_quantized/info.yaml new file mode 100644 index 00000000..79b3787c --- /dev/null +++ b/qai_hub_models/models/face_attrib_net_quantized/info.yaml @@ -0,0 +1,32 @@ +name: Facial-Attribute-Detection-Quantized +# id must match with the model dir name in qai_hub_models +id: face_attrib_net_quantized +status: public +headline: Comprehensive facial analysis by extracting face features. +domain: Computer Vision +description: Facial feature extraction and additional attributes including liveness, eyeclose, mask and glasses detection for face recognition. +use_case: Object Detection +tags: + - real-time + - quantized +license: https://github.com/qcom-ai-hub/ai-hub-models-internal/blob/main/LICENSE +deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf +source_repo: https://github.com/quic/ai-hub-models/blob/main/qai_hub_models/models/face_attrib_net/model.py +technical_details: + Model checkpoint: multitask_FR_state_dict.pt + Input resolution: 128x128 + Input channel number: 1 + Number of parameters: 11.6M + Model size: 47.6MB +applicable_scenarios: + - Incabinet driver monitoring, phone unlocking, and building access control. +related_models: [] +form_factors: + - Phone + - Tablet + - IoT +has_static_banner: true +has_animated_banner: true +license_type: bsd-3-clause +deploy_license_type: AI Model Hub License +dataset: [] diff --git a/qai_hub_models/models/face_attrib_net_quantized/model.py b/qai_hub_models/models/face_attrib_net_quantized/model.py new file mode 100644 index 00000000..bfbb5682 --- /dev/null +++ b/qai_hub_models/models/face_attrib_net_quantized/model.py @@ -0,0 +1,92 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from __future__ import annotations + +# isort: off +# This verifies aimet is installed, and this must be included first. +from qai_hub_models.utils.quantization_aimet import ( + AIMETQuantizableMixin, + constrain_quantized_inputs_to_image_range, + tie_observers, +) + +# isort: on + +import torch +from aimet_torch.cross_layer_equalization import equalize_model +from aimet_torch.model_preparer import prepare_model +from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim + +from qai_hub_models.models.face_attrib_net.model import FaceAttribNet +from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset + +MODEL_ID = __name__.split(".")[-2] +MODEL_ASSET_VERSION = 1 +DEFAULT_ENCODINGS = "FaceAttribNet_quantized_encodings.json" + +OUT_NAMES = [ + "id_feature", + "liveness_feature", + "eye_closeness", + "glasses", + "mask", + "sunglasses", +] + + +class FaceAttribNetQuantizable(AIMETQuantizableMixin, FaceAttribNet): + """FaceAttribNet with post train quantization support. + + Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints. + Support for quantizing using your own weights & data will come at a later date.""" + + def __init__( + self, + fanet_model: QuantizationSimModel, + ) -> None: + # Input is already normalized by sim_model. Disable it in the wrapper model. + FaceAttribNet.__init__(self, fanet_model.model) + AIMETQuantizableMixin.__init__( + self, + fanet_model, + ) + + @classmethod + def from_pretrained( + cls, + aimet_encodings: str | None = "DEFAULT", + ) -> FaceAttribNetQuantizable: + """ + Parameters: + aimet_encodings: + if "DEFAULT": Loads the model with aimet encodings calibrated on imagenette. + elif None: Doesn't load any encodings. Used when computing encodings. + else: Interprets as a filepath and loads the encodings stored there. + """ + model = FaceAttribNet.from_pretrained() + input_shape = cls.get_input_spec()["image"][0] + + model = prepare_model(model) + equalize_model(model, input_shape) + sim = QuantizationSimModel( + model, + quant_scheme="tf_enhanced", + default_param_bw=8, + default_output_bw=8, + config_file=get_default_aimet_config(), + dummy_input=torch.rand(input_shape), + ) + tie_observers(sim) + constrain_quantized_inputs_to_image_range(sim) + + if aimet_encodings: + if aimet_encodings == "DEFAULT": + aimet_encodings = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_ENCODINGS + ).fetch() + load_encodings_to_sim(sim, aimet_encodings) + + return cls(sim) diff --git a/qai_hub_models/models/face_attrib_net_quantized/perf.yaml b/qai_hub_models/models/face_attrib_net_quantized/perf.yaml new file mode 100644 index 00000000..18f6cf8a --- /dev/null +++ b/qai_hub_models/models/face_attrib_net_quantized/perf.yaml @@ -0,0 +1,579 @@ +aggregated: + supported_devices: + - Snapdragon 8 Elite QRD + - Samsung Galaxy S24 + - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ + - Samsung Galaxy S23 + - Samsung Galaxy S23 Ultra + - Samsung Galaxy S23+ + - Samsung Galaxy S22 5G + - Samsung Galaxy S22 Ultra 5G + - Samsung Galaxy S22+ 5G + - Samsung Galaxy Tab S8 + - Xiaomi 12 + - Xiaomi 12 Pro + - Samsung Galaxy S21 + - Samsung Galaxy S21 Ultra + - Samsung Galaxy S21+ + - Snapdragon X Elite CRD + - Snapdragon X Plus 8-Core CRD + - SA8775P ADP + - QCS6490 (Proxy) + - RB3 Gen 2 (Proxy) + - QCS8250 (Proxy) + - RB5 (Proxy) + - QCS8450 (Proxy) + - XR2 Gen 2 (Proxy) + - QCS8550 (Proxy) + - SA7255P ADP + - SA8255 (Proxy) + - SA8295P ADP + - SA8650 (Proxy) + supported_chipsets: + - Snapdragon® 8 Elite + - Snapdragon® 8 Gen 3 + - Snapdragon® 8 Gen 2 + - Snapdragon® 8 Gen 1 + - Snapdragon® 888 + - Snapdragon® X Elite + - Snapdragon® X Plus 8-Core + - SA8775P + - QCS6490 Proxy + - QCS8250 Proxy + - QCS8450 Proxy + - QCS8550 Proxy + - SA7255P + - SA8255P Proxy + - SA8295P + - SA8650P Proxy +models: +- name: Facial-Attribute-Detection-Quantized + universal_assets: + torchscript_onnx_tflite: mn1wzrdvm + torchscript_onnx: mqp3z79vm + performance_metrics: + - torchscript_onnx_tflite: + inference_time: 498.0 + throughput: 2008.0321285140562 + estimated_peak_memory_range: + min: 20480 + max: 9782144 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 170 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 170 + job_id: j5welj3j5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 515.0 + throughput: 1941.7475728155339 + estimated_peak_memory_range: + min: 12288 + max: 20116896 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 158 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 158 + job_id: jp0zmnle5 + job_status: Passed + torchscript_onnx: + inference_time: 912.0 + throughput: 1096.4912280701753 + estimated_peak_memory_range: + min: 12288 + max: 16427544 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 296 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 296 + job_id: j5weljqj5 + job_status: Passed + reference_device_info: + name: Samsung Galaxy S23 + os: '13' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 2 + timestamp: '2024-12-12T00:01:44Z' + - torchscript_onnx_tflite: + inference_time: 376.0 + throughput: 2659.574468085106 + estimated_peak_memory_range: + min: 16384 + max: 30218928 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 170 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 170 + job_id: jg9lz6yvg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 380.0 + throughput: 2631.5789473684213 + estimated_peak_memory_range: + min: 12288 + max: 27493504 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 158 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 158 + job_id: jp8qelz8p + job_status: Passed + torchscript_onnx: + inference_time: 680.0 + throughput: 1470.5882352941176 + estimated_peak_memory_range: + min: 0 + max: 106702848 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 296 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 296 + job_id: jg9lz6wvg + job_status: Passed + reference_device_info: + name: Samsung Galaxy S24 + os: '14' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-12-12T00:01:45Z' + - torchscript_onnx_tflite: + inference_time: 380.0 + throughput: 2631.5789473684213 + estimated_peak_memory_range: + min: 12288 + max: 19583696 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 170 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 170 + job_id: jp14nrwlp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 402.0 + throughput: 2487.5621890547263 + estimated_peak_memory_range: + min: 8192 + max: 21456656 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 158 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 158 + job_id: jgke2j3og + job_status: Passed + torchscript_onnx: + inference_time: 659.0 + throughput: 1517.4506828528072 + estimated_peak_memory_range: + min: 0 + max: 54069344 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 296 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 296 + job_id: jp14nrelp + job_status: Passed + reference_device_info: + name: Snapdragon 8 Elite QRD + os: '15' + form_factor: Phone + os_name: Android + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-12-12T00:01:46Z' + - torchscript_onnx_tflite: + inference_time: 2656.0 + throughput: 376.50602409638554 + estimated_peak_memory_range: + min: 12288 + max: 33073840 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 169 + layers_on_gpu: 1 + layers_on_cpu: 0 + total_layers: 170 + job_id: jgdxdjqlp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1775.0 + throughput: 563.3802816901408 + estimated_peak_memory_range: + min: 12288 + max: 12251488 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 158 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 158 + job_id: j5q6lj3mp + job_status: Passed + reference_device_info: + name: RB3 Gen 2 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: QCS6490 Proxy + timestamp: '2024-12-12T00:01:34Z' + - torchscript_onnx_tflite: + inference_time: 54450.0 + throughput: 18.365472910927455 + estimated_peak_memory_range: + min: 2117632 + max: 5375096 + primary_compute_unit: CPU + precision: fp32 + layer_info: + layers_on_npu: 46 + layers_on_gpu: 0 + layers_on_cpu: 124 + total_layers: 170 + job_id: j57yeqxr5 + job_status: Passed + reference_device_info: + name: RB5 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: QCS8250 Proxy + timestamp: '2024-12-12T00:01:23Z' + - torchscript_onnx_tflite: + inference_time: 504.0 + throughput: 1984.126984126984 + estimated_peak_memory_range: + min: 20480 + max: 9946648 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 170 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 170 + job_id: jp4lyzvl5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 480.0 + throughput: 2083.3333333333335 + estimated_peak_memory_range: + min: 86016 + max: 1309928 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 158 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 158 + job_id: jglvyj3l5 + job_status: Passed + reference_device_info: + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: QCS8550 Proxy + timestamp: '2024-12-12T00:01:35Z' + - torchscript_onnx_tflite: + inference_time: 5066.0 + throughput: 197.39439399921042 + estimated_peak_memory_range: + min: 16384 + max: 22369248 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 170 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 170 + job_id: jpxklwy95 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 5240.0 + throughput: 190.83969465648855 + estimated_peak_memory_range: + min: 24576 + max: 10454416 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 158 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 158 + job_id: jp3jzyezg + job_status: Passed + reference_device_info: + name: SA7255P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA7255P + timestamp: '2024-12-12T00:01:38Z' + - torchscript_onnx_tflite: + inference_time: 502.0 + throughput: 1992.03187250996 + estimated_peak_memory_range: + min: 16384 + max: 9399928 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 170 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 170 + job_id: j5mn0j3qp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 488.0 + throughput: 2049.1803278688526 + estimated_peak_memory_range: + min: 86016 + max: 1635648 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 158 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 158 + job_id: jgo2lj3dp + job_status: Passed + reference_device_info: + name: SA8255 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8255P Proxy + timestamp: '2024-12-12T00:01:39Z' + - torchscript_onnx_tflite: + inference_time: 1022.0 + throughput: 978.4735812133073 + estimated_peak_memory_range: + min: 16384 + max: 19595440 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 170 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 170 + job_id: jgn6zj3m5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1070.0 + throughput: 934.5794392523364 + estimated_peak_memory_range: + min: 12288 + max: 6029264 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 158 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 158 + job_id: jpv6ljvm5 + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-12-12T00:01:40Z' + - torchscript_onnx_tflite: + inference_time: 504.0 + throughput: 1984.126984126984 + estimated_peak_memory_range: + min: 20480 + max: 10018472 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 170 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 170 + job_id: jprvlzeeg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 484.0 + throughput: 2066.115702479339 + estimated_peak_memory_range: + min: 24576 + max: 1316064 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 158 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 158 + job_id: jgjvrje8g + job_status: Passed + reference_device_info: + name: SA8650 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8650P Proxy + timestamp: '2024-12-12T00:01:41Z' + - torchscript_onnx_tflite: + inference_time: 919.0 + throughput: 1088.139281828074 + estimated_peak_memory_range: + min: 16384 + max: 24332512 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 170 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 170 + job_id: jp2kr2lmp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1021.0 + throughput: 979.4319294809011 + estimated_peak_memory_range: + min: 12288 + max: 6135408 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 158 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 158 + job_id: jped7jk05 + job_status: Passed + reference_device_info: + name: SA8775P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8775P + timestamp: '2024-12-12T00:01:42Z' + - torchscript_onnx_tflite: + inference_time: 641.0 + throughput: 1560.0624024960998 + estimated_peak_memory_range: + min: 16384 + max: 33610224 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 170 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 170 + job_id: jpy1o964p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 637.0 + throughput: 1569.8587127158555 + estimated_peak_memory_range: + min: 16384 + max: 27155952 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 158 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 158 + job_id: jgz3l1r65 + job_status: Passed + reference_device_info: + name: QCS8450 (Proxy) + os: '13' + form_factor: Xr + os_name: Android + manufacturer: Qualcomm + chipset: QCS8450 Proxy + timestamp: '2024-12-12T00:01:43Z' + - torchscript_onnx_qnn: + inference_time: 628.0 + throughput: 1592.3566878980891 + estimated_peak_memory_range: + min: 618496 + max: 618496 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 158 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 158 + job_id: j56y8kn7p + job_status: Passed + torchscript_onnx: + inference_time: 1026.0 + throughput: 974.6588693957115 + estimated_peak_memory_range: + min: 15904768 + max: 15904768 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 296 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 296 + job_id: jgdxdjolp + job_status: Passed + reference_device_info: + name: Snapdragon X Elite CRD + os: '11' + form_factor: Compute + os_name: Windows + manufacturer: Qualcomm + chipset: Snapdragon® X Elite + timestamp: '2024-12-12T00:01:47Z' diff --git a/qai_hub_models/models/face_attrib_net_quantized/test.py b/qai_hub_models/models/face_attrib_net_quantized/test.py new file mode 100644 index 00000000..494cd11d --- /dev/null +++ b/qai_hub_models/models/face_attrib_net_quantized/test.py @@ -0,0 +1,58 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +import numpy as np +from scipy.special import softmax + +from qai_hub_models.models._shared.face_attrib_net.app import FaceAttribNetApp +from qai_hub_models.models._shared.face_attrib_net.demo import INPUT_IMAGE_ADDRESS +from qai_hub_models.models._shared.face_attrib_net.demo import ( + face_attrib_net_demo as demo_main, +) +from qai_hub_models.models.face_attrib_net_quantized.model import ( + MODEL_ASSET_VERSION, + MODEL_ID, + OUT_NAMES, + FaceAttribNetQuantizable, +) +from qai_hub_models.utils.asset_loaders import ( + CachedWebModelAsset, + load_image, + load_json, +) +from qai_hub_models.utils.testing import assert_most_same + +OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, "output_sample.json" +) + + +# Verify that the output from Torch is as expected. +def test_task(): + app = FaceAttribNetApp(FaceAttribNetQuantizable.from_pretrained()) + original_image = load_image(INPUT_IMAGE_ADDRESS) + output_tensor = app.run_inference_on_image(original_image) + output_tensor_oracle = load_json(OUTPUT_IMAGE_ADDRESS) + + for i in range(len(output_tensor)): + if i == 0 or i == 1: + continue + elif i != 5: + assert_most_same( + softmax(output_tensor[i]) > 0.5, + softmax(output_tensor_oracle[OUT_NAMES[i]]) > 0.5, + diff_tol=0.001, + ) + else: + assert_most_same( + output_tensor[i] > 0.5, + np.array(output_tensor_oracle[OUT_NAMES[i]]) > 0.5, + diff_tol=0.001, + ) + + print("Unit test is done") + + +def test_demo(): + demo_main(FaceAttribNetQuantizable) diff --git a/qai_hub_models/models/face_det_lite/app.py b/qai_hub_models/models/face_det_lite/app.py index f0c5f666..29db0089 100644 --- a/qai_hub_models/models/face_det_lite/app.py +++ b/qai_hub_models/models/face_det_lite/app.py @@ -209,9 +209,7 @@ def run_inference_on_image( img_tensor = img_tensor[:, :, :, -1] img_tensor = img_tensor[np.newaxis, ...] - - dets = detect(self.model, img_tensor, threshold=0.5, nms_iou=-1, stride=8) - + dets = detect(self.model, img_tensor, threshold=0.55, nms_iou=-1, stride=8) res = [] for n in range(0, len(dets)): xmin, ymin, w, h = dets[n].xywh diff --git a/qai_hub_models/models/face_det_lite/demo.py b/qai_hub_models/models/face_det_lite/demo.py index bedcc1bb..8883fb94 100644 --- a/qai_hub_models/models/face_det_lite/demo.py +++ b/qai_hub_models/models/face_det_lite/demo.py @@ -2,64 +2,15 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- -import json -from pathlib import Path - -from qai_hub_models.models.face_det_lite.app import FaceDetLiteApp -from qai_hub_models.models.face_det_lite.model import ( - MODEL_ASSET_VERSION, +from qai_hub_models.models._shared.face_detection.demo import main as face_det_lite_demo +from qai_hub_models.models._shared.face_detection.model import ( MODEL_ID, FaceDetLite_model, ) -from qai_hub_models.utils.args import ( - demo_model_from_cli_args, - get_model_cli_parser, - get_on_device_demo_parser, - validate_on_device_demo_args, -) -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image - -INPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, "test_640x480_Rooney.jpg" -) - - -# Run face_det_lite model end-to-end on a sample image. -# The demo will output the face bounding boxes in json files -# the bounding box represented by left, top, width, and height. -def main(): - # Demo parameters - parser = get_model_cli_parser(FaceDetLite_model) - parser = get_on_device_demo_parser(parser, add_output_dir=True) - parser.add_argument( - "--image", - type=str, - default=INPUT_IMAGE_ADDRESS, - help="image file path or URL", - ) - # args = parser.parse_args([] if is_test else None) - args = parser.parse_args([]) - model = demo_model_from_cli_args(FaceDetLite_model, MODEL_ID, args) - validate_on_device_demo_args(args, MODEL_ID) - - # Load image - (_, _, height, width) = FaceDetLite_model.get_input_spec()["input"][0] - orig_image = load_image(args.image) - print("Model Loaded") - - app = FaceDetLiteApp(model) - res = app.run_inference_on_image(orig_image) - out_dict = {} - - out_dict["bounding obx"] = str(res) - output_path = ( - args.output_dir or str(Path() / "build") - ) + "/FaceDetLitebNet_output.json" - with open(output_path, "w", encoding="utf-8") as wf: - json.dump(out_dict, wf, ensure_ascii=False, indent=4) - print(f"Model outputs are saved at: {output_path}") +def main(is_test: bool = False): + face_det_lite_demo(FaceDetLite_model, MODEL_ID, is_test) if __name__ == "__main__": diff --git a/qai_hub_models/models/face_det_lite/model.py b/qai_hub_models/models/face_det_lite/model.py index a8273e81..4b6f2ae2 100644 --- a/qai_hub_models/models/face_det_lite/model.py +++ b/qai_hub_models/models/face_det_lite/model.py @@ -4,256 +4,14 @@ # --------------------------------------------------------------------- from __future__ import annotations -import math - -import torch -import torch.nn as nn - -from qai_hub_models.models._shared.face_detection.layers import ( - Block3x3, - CBAModule, - DetectModule, - HeadModule, - SeModule, - UpModule, -) -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_torch -from qai_hub_models.utils.base_model import BaseModel -from qai_hub_models.utils.input_spec import InputSpec +from qai_hub_models.models._shared.face_detection.model import FaceDetLite_model MODEL_ID = "face_det_lite" -MODEL_ASSET_VERSION = "1" +MODEL_ASSET_VERSION = "2" DEFAULT_WEIGHTS = "qfd360_sl_model.pt" -class FaceDetLite_model(BaseModel): - """ - qualcomm face detector model. - Detect bounding box for face, - Detect landmarks: face landmarks. - The output will be saved as 3 maps which will be decoded to final result in the FaceDetLite_App. - """ - - def __init__(self, model: nn.Module) -> None: - super().__init__() - self.model = model - +class FaceDetLite_model(FaceDetLite_model): @classmethod - def from_pretrained(cls, checkpoint_path: str | None = None): - """Load FaceDetLite from a weightfile created by the source FaceDetLite repository.""" - - checkpoint_path = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_WEIGHTS - ) - FaceDetLite_model = FaceDetLite() - FaceDetLite_model.load_state_dict(load_torch(checkpoint_path)["model_state"]) - FaceDetLite_model.to(torch.device("cpu")) - - return cls(FaceDetLite_model) - - def forward(self, image: torch.Tensor): - """ - Run FaceDetLite on `image`, and produce a the list of face bounding box - - Parameters: - image: Pixel values pre-processed for encoder consumption. - Range: float[0, 1] - 1-channel gray scale image - - Returns: - heatmap: N,C,H,W the heatmap for the person/face detection. - bbox: N,C*4, H,W the bounding box coordinate as a map. - landmark: N,C*10,H,W the coordinates of landmarks as a map. - """ - return self.model(image) - - @staticmethod - def get_input_spec( - batch_size: int = 1, - height: int = 480, - width: int = 640, - ) -> InputSpec: - """ - Returns the input specification (name -> (shape, type). This can be - used to submit profiling job on Qualcomm AI Hub. - """ - return {"input": ((batch_size, 1, height, width), "float32")} - - @staticmethod - def get_output_names() -> list[str]: - return ["heatmap", "bbox", "landmark"] - - @staticmethod - def get_channel_last_inputs() -> list[str]: - return ["input"] - - @staticmethod - def get_channel_last_outputs() -> list[str]: - return ["heatmap", "bbox", "landmark"] - - -class FaceDetLite(nn.Module): - def __init__( - self, - wide: int = 32, - has_ext: bool = False, - upmode: str = "UCBA", - act: str = "relu", - RGB: bool = False, - has_se: bool = True, - phase: str = "train", - ): - super().__init__() - """ - FaceDetLite face detector model for face and landmark detection. - output face bounding box and 5 landmarks. - - Parameters: - wide: the channel size of bandwith of the intermediate layers - has_ext: if add extension layer in the head module. - upmode: upsampling mode. - act: activation function. - RGB: if the input is a 3 channel RGB - has_se: if has the se module - phase: "train" or "test" - - Returns: - FaceDetLite model instance. - """ - self.use_rgb = RGB - self.has_landmark = True - # define backbone - self.bb = Mbv3SmallFast(act, RGB, has_se) - - c1, c2 = self.bb.uplayer_shape - act = "relu" - self.conv3 = CBAModule( - self.bb.output_channels, - wide, - kernel_size=1, - stride=1, - padding=0, - bias=False, - act=act, - ) # s32 - self.connect1 = CBAModule(c1, wide, kernel_size=1, act=act) # s8 - self.connect2 = CBAModule(c2, wide, kernel_size=1, act=act) # s16 - - self.up0 = UpModule( - wide, wide, kernel_size=2, stride=2, mode=upmode, act=act - ) # s16 - self.up1 = UpModule( - wide, wide, kernel_size=2, stride=2, mode=upmode, act=act - ) # s8 - self.detect = DetectModule(wide, act=act) - - self.center = HeadModule(wide, 1, act=act) - self.box = HeadModule(wide, 4, act=act) - - if self.has_landmark: - self.landmark = HeadModule(wide, 10, act=act) - self.phase = phase - - self.bridge = nn.Conv2d( - wide * 2, wide, kernel_size=1, stride=1, padding=0, bias=False - ) - - def forward(self, input): - """ - input: N,C,H,W (1,1,480,640) tensor of input image - return: 3 tensors including - heatmap: N,C,H,W (1,1,60,80) - bbox: N,C,H,W (1,4,120,80) - landmark: N,C,H,W (1,10,60,80) - """ - - s8_, s16_, s32_ = self.bb(input) - s32 = self.conv3(s32_) - - s16 = self.up0(s32) + self.connect2(s16_) - s8 = self.up1(s16) + self.connect1(s8_) - x = self.detect(s8) # s4: B,C,200,200 - - center = self.center(x) - box = self.box(x) - - if self.has_landmark: - landmark = self.landmark(x) - if self.phase == "test": - return center.sigmoid(), box, landmark - - return center, box, landmark - - -class Mbv3SmallFast(nn.Module): - def __init__(self, act="relu", RGB=True, has_se=True): - super().__init__() - - self.keep = [2, 7] - self.uplayer_shape = [32, 64] - self.output_channels = 96 - - if RGB: - self.conv1 = nn.Conv2d( - 3, 16, kernel_size=3, stride=2, padding=1, bias=False - ) - else: # for gray - self.conv1 = nn.Conv2d( - 1, 16, kernel_size=3, stride=2, padding=1, bias=False - ) - - self.bn1 = nn.BatchNorm2d(16) - if act == "relu": - self.hs1 = nn.ReLU(inplace=True) - if has_se: - self.bneck = nn.Sequential( - Block3x3(3, 16, 16, 16, self.hs1, None, 2), # 0 * - Block3x3(3, 16, 64, 32, self.hs1, None, 2), # 1 - Block3x3(3, 32, 96, 32, self.hs1, None, 1), # 2 * - Block3x3(5, 32, 96, 32, self.hs1, SeModule(32), 2), # 3 - Block3x3(5, 32, 224, 32, self.hs1, SeModule(32), 1), # 4 - Block3x3(5, 32, 224, 32, self.hs1, SeModule(32), 1), # 5 - Block3x3(5, 32, 128, 64, self.hs1, SeModule(64), 1), # 6 - Block3x3(5, 64, 160, 64, self.hs1, SeModule(64), 1), # 7 * - Block3x3(5, 64, 256, 96, self.hs1, SeModule(96), 2), # 8 - ) - - else: - self.bneck = nn.Sequential( - Block3x3(3, 16, 16, 16, self.hs1, None, 2), # 0 * - Block3x3(3, 16, 72, 24, self.hs1, None, 2), # 1 - Block3x3(3, 24, 88, 24, self.hs1, None, 1), # 2 * - Block3x3(5, 24, 96, 40, self.hs1, None, 2), # 3 - Block3x3(5, 40, 240, 40, self.hs1, None, 1), # 4 - Block3x3(5, 40, 240, 40, self.hs1, None, 1), # 5 - Block3x3(5, 40, 120, 48, self.hs1, None, 1), # 6 - Block3x3(5, 48, 144, 48, self.hs1, None, 1), # 7 * - Block3x3(5, 48, 288, 96, self.hs1, None, 2), # 8 - ) - - def initialize_weights(self): - print("random init...") - for m in self.modules(): - if isinstance(m, nn.Conv2d): - n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - m.weight.data.normal_(0, math.sqrt(2.0 / n)) - if m.bias is not None: - m.bias.data.zero_() - elif isinstance(m, nn.BatchNorm2d): - m.weight.data.fill_(1) - m.bias.data.zero_() - elif isinstance(m, nn.Linear): - n = m.weight.size(1) - m.weight.data.normal_(0, 0.01) - m.bias.data.zero_() - - def forward(self, x): - x = self.hs1(self.bn1(self.conv1(x))) - outs = [] - for index, item in enumerate(self.bneck): - x = item(x) - - if index in self.keep: - outs.append(x) - outs.append(x) - return outs + def from_pretrained(cls) -> FaceDetLite_model: # type: ignore + return super().from_pretrained() diff --git a/qai_hub_models/models/face_det_lite/perf.yaml b/qai_hub_models/models/face_det_lite/perf.yaml index ca31d366..7768c6e6 100644 --- a/qai_hub_models/models/face_det_lite/perf.yaml +++ b/qai_hub_models/models/face_det_lite/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: Lightweight-Face-Detection universal_assets: - torchscript_onnx_tflite: mm6v4v46q - torchscript_onnx: mn41d16zn + torchscript_onnx_tflite: mngg1800n + torchscript_onnx: mn4l1p3vq performance_metrics: - torchscript_onnx_tflite: - inference_time: 1033.0 - throughput: 968.054211035818 + inference_time: 1029.0 + throughput: 971.8172983479105 estimated_peak_memory_range: - min: 12288 - max: 3882472 + min: 16384 + max: 7523576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: j57ykxqr5 + job_id: jp0zmn425 job_status: Passed torchscript_onnx_qnn: - inference_time: 1106.0 - throughput: 904.1591320072333 + inference_time: 1100.0 + throughput: 909.0909090909091 estimated_peak_memory_range: - min: 1249280 - max: 28109808 + min: 2310144 + max: 6063752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: jgkeo3jog + job_id: jped7jo75 job_status: Passed torchscript_onnx: - inference_time: 2269.0 - throughput: 440.72278536800354 + inference_time: 2244.0 + throughput: 445.63279857397504 estimated_peak_memory_range: min: 1245184 - max: 3558168 + max: 94245624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jp147erlp + job_id: jprvlz8eg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:32:09Z' + timestamp: '2024-12-11T23:59:52Z' - torchscript_onnx_tflite: inference_time: 682.0 throughput: 1466.275659824047 estimated_peak_memory_range: min: 12288 - max: 15620672 + max: 13747008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jp4lmvzl5 + job_id: jp8qel2zp job_status: Passed torchscript_onnx_qnn: - inference_time: 733.0 - throughput: 1364.256480218281 + inference_time: 739.0 + throughput: 1353.1799729364006 estimated_peak_memory_range: - min: 0 - max: 13439024 + min: 1245184 + max: 16800592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: j5q6z3jmp + job_id: j5weljwz5 job_status: Passed torchscript_onnx: - inference_time: 1375.0 - throughput: 727.2727272727273 + inference_time: 1383.0 + throughput: 723.0657989877079 estimated_peak_memory_range: min: 0 - max: 44225264 + max: 44018592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgdx8ojlp + job_id: jp2kr20mp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:32:10Z' + timestamp: '2024-12-11T23:59:53Z' - torchscript_onnx_tflite: - inference_time: 753.0 - throughput: 1328.0212483399735 + inference_time: 715.0 + throughput: 1398.6013986013986 estimated_peak_memory_range: - min: 8192 - max: 9824400 + min: 0 + max: 10049840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jpxk3yw95 + job_id: jgke2jvyg job_status: Passed torchscript_onnx_qnn: - inference_time: 628.0 - throughput: 1592.3566878980891 + inference_time: 813.0 + throughput: 1230.0123001230013 estimated_peak_memory_range: - min: 1241088 - max: 12146144 + min: 0 + max: 10886864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: jglvo3jl5 + job_id: jg9lz60qg job_status: Passed torchscript_onnx: - inference_time: 1216.0 - throughput: 822.3684210526316 + inference_time: 1215.0 + throughput: 823.0452674897119 estimated_peak_memory_range: min: 0 - max: 26550880 + max: 27077552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j5we8qo65 + job_id: jpy1o9r4p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:32:11Z' + timestamp: '2024-12-11T23:59:54Z' - torchscript_onnx_tflite: - inference_time: 1025.0 - throughput: 975.609756097561 + inference_time: 1027.0 + throughput: 973.7098344693281 estimated_peak_memory_range: - min: 20480 - max: 3683656 + min: 12288 + max: 3307656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: j5mno3jqp + job_id: j5q6lj07p job_status: Passed torchscript_onnx_qnn: - inference_time: 1092.0 - throughput: 915.7509157509157 + inference_time: 1076.0 + throughput: 929.368029739777 estimated_peak_memory_range: - min: 53248 - max: 1674792 + min: 1257472 + max: 2385056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: j56yrnk7p + job_id: jp14nr2kp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:32:00Z' + timestamp: '2024-12-11T23:59:43Z' - torchscript_onnx_tflite: - inference_time: 22468.0 - throughput: 44.50774434751647 + inference_time: 22496.0 + throughput: 44.45234708392603 estimated_peak_memory_range: - min: 319488 - max: 11273088 + min: 356352 + max: 15314528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jgn6o3jm5 + job_id: jglvyj4e5 job_status: Passed torchscript_onnx_qnn: - inference_time: 22643.0 - throughput: 44.16375921918474 + inference_time: 22594.0 + throughput: 44.25953793042401 estimated_peak_memory_range: - min: 1339392 - max: 7022160 + min: 1245184 + max: 11880256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: jgo2o3jdp + job_id: j5weljwj5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:32:02Z' + timestamp: '2024-12-11T23:59:45Z' - torchscript_onnx_tflite: - inference_time: 1031.0 - throughput: 969.9321047526673 + inference_time: 1032.0 + throughput: 968.9922480620155 estimated_peak_memory_range: - min: 16384 - max: 4954184 + min: 24576 + max: 4725560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jprvoezeg + job_id: j56y8k2vp job_status: Passed torchscript_onnx_qnn: inference_time: 1085.0 throughput: 921.6589861751152 estimated_peak_memory_range: - min: 1257472 - max: 2459160 + min: 1282048 + max: 2722784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: jpv6evjm5 + job_id: jgdxdjnlp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:32:03Z' + timestamp: '2024-12-11T23:59:46Z' - torchscript_onnx_tflite: - inference_time: 1744.0 - throughput: 573.394495412844 + inference_time: 1764.0 + throughput: 566.8934240362812 estimated_peak_memory_range: - min: 12288 - max: 8950032 + min: 16384 + max: 9118672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jp2k4l2mp + job_id: jp3jzynxg job_status: Passed torchscript_onnx_qnn: - inference_time: 2153.0 - throughput: 464.4681839294008 + inference_time: 1889.0 + throughput: 529.3806246691371 estimated_peak_memory_range: min: 0 - max: 6007552 + max: 5891600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: jgjvoej8g + job_id: jp4lyzkl5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:32:04Z' + timestamp: '2024-12-11T23:59:47Z' - torchscript_onnx_tflite: inference_time: 1034.0 throughput: 967.1179883945841 estimated_peak_memory_range: - min: 24576 - max: 4923728 + min: 16384 + max: 6974816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jpy1q694p + job_id: jgo2ljz4p job_status: Passed torchscript_onnx_qnn: - inference_time: 1089.0 - throughput: 918.2736455463728 + inference_time: 1078.0 + throughput: 927.643784786642 estimated_peak_memory_range: - min: 1273856 - max: 2495608 + min: 16384 + max: 1331968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: jped8kj05 + job_id: jpxklwn95 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:32:05Z' + timestamp: '2024-12-11T23:59:49Z' - torchscript_onnx_tflite: - inference_time: 1925.0 - throughput: 519.4805194805194 + inference_time: 1917.0 + throughput: 521.6484089723526 estimated_peak_memory_range: - min: 16384 - max: 8906544 + min: 0 + max: 14543824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jp0zdlne5 + job_id: jpv6ljq75 job_status: Passed torchscript_onnx_qnn: - inference_time: 2140.0 - throughput: 467.2897196261682 + inference_time: 2187.0 + throughput: 457.2473708276177 estimated_peak_memory_range: min: 1245184 - max: 6975056 + max: 7113664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: jgz38r165 + job_id: j5mn0jqqp job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:32:06Z' + timestamp: '2024-12-11T23:59:50Z' - torchscript_onnx_tflite: - inference_time: 1263.0 - throughput: 791.7656373713381 + inference_time: 1264.0 + throughput: 791.1392405063291 estimated_peak_memory_range: min: 16384 - max: 14086752 + max: 15271472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jp8q6zl8p + job_id: jgjvrjd7g job_status: Passed torchscript_onnx_qnn: - inference_time: 1345.0 - throughput: 743.4944237918215 + inference_time: 1349.0 + throughput: 741.2898443291327 estimated_peak_memory_range: - min: 1273856 - max: 17229152 + min: 1245184 + max: 16877472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: jg9lkw6vg + job_id: jgn6zjlm5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:32:08Z' + timestamp: '2024-12-11T23:59:51Z' - torchscript_onnx_qnn: - inference_time: 1236.0 - throughput: 809.0614886731391 + inference_time: 1226.0 + throughput: 815.6606851549756 estimated_peak_memory_range: min: 1232896 max: 1232896 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: jp3jxeyzg + job_id: jgdxdjnkp job_status: Passed torchscript_onnx: - inference_time: 2040.0 - throughput: 490.19607843137254 + inference_time: 2066.0 + throughput: 484.027105517909 estimated_peak_memory_range: - min: 1437696 - max: 1437696 + min: 3104768 + max: 3104768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jg9lkwvlg + job_id: jp0zmn3e5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:32:12Z' + timestamp: '2024-12-11T23:59:55Z' diff --git a/qai_hub_models/models/face_det_lite/test.py b/qai_hub_models/models/face_det_lite/test.py index dd0edb4d..546f5b3c 100644 --- a/qai_hub_models/models/face_det_lite/test.py +++ b/qai_hub_models/models/face_det_lite/test.py @@ -2,8 +2,8 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- +from qai_hub_models.models._shared.face_detection.demo import INPUT_IMAGE_ADDRESS from qai_hub_models.models.face_det_lite.app import FaceDetLiteApp -from qai_hub_models.models.face_det_lite.demo import INPUT_IMAGE_ADDRESS from qai_hub_models.models.face_det_lite.demo import main as demo_main from qai_hub_models.models.face_det_lite.model import ( MODEL_ASSET_VERSION, @@ -30,7 +30,7 @@ def test_task(): output_tensor_oracle = load_json(OUTPUT_IMAGE_ADDRESS) assert_most_same( - str(output_tensor), output_tensor_oracle["bounding obx"], diff_tol=0.01 + str(output_tensor), output_tensor_oracle["bounding box"], diff_tol=0.01 ) diff --git a/qai_hub_models/models/face_det_lite_quantized/README.md b/qai_hub_models/models/face_det_lite_quantized/README.md new file mode 100644 index 00000000..0e24f9ff --- /dev/null +++ b/qai_hub_models/models/face_det_lite_quantized/README.md @@ -0,0 +1,58 @@ +[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md) + + +# [Lightweight-Face-Detection-Quantized: face_det_lite_quantized is a face detection model](https://aihub.qualcomm.com/models/face_det_lite_quantized) + +face_det_lite_quantized is a machine learning model that detect face in the images + +This is based on the implementation of Lightweight-Face-Detection-Quantized found [here](https://github.com/quic/ai-hub-models/blob/main/qai_hub_models/models/face_det_lite/model.py). This repository contains scripts for optimized on-device +export suitable to run on Qualcomm® devices. More details on model performance +accross various devices, can be found [here](https://aihub.qualcomm.com/models/face_det_lite_quantized). + +[Sign up](https://myaccount.qualcomm.com/signup) to start using Qualcomm AI Hub and run these models on a hosted Qualcomm® device. + + + + +## Example & Usage + + +Once installed, run the following simple CLI demo: + +```bash +python -m qai_hub_models.models.face_det_lite_quantized.demo +``` +More details on the CLI tool can be found with the `--help` option. See +[demo.py](demo.py) for sample usage of the model including pre/post processing +scripts. Please refer to our [general instructions on using +models](../../../#getting-started) for more usage instructions. + +## Export for on-device deployment + +This repository contains export scripts that produce a model optimized for +on-device deployment. This can be run as follows: + +```bash +python -m qai_hub_models.models.face_det_lite_quantized.export +``` +Additional options are documented with the `--help` option. Note that the above +script requires access to Deployment instructions for Qualcomm® AI Hub. + + +## License +* The license for the original implementation of Lightweight-Face-Detection-Quantized can be found + [here](https://github.com/qcom-ai-hub/ai-hub-models-internal/blob/main/LICENSE). +* The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) + + +## References +* [None](None) +* [Source Model Implementation](https://github.com/quic/ai-hub-models/blob/main/qai_hub_models/models/face_det_lite/model.py) + + + +## Community +* Join [our AI Hub Slack community](https://aihub.qualcomm.com/community/slack) to collaborate, post questions and learn more about on-device AI. +* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com). + + diff --git a/qai_hub_models/models/face_det_lite_quantized/__init__.py b/qai_hub_models/models/face_det_lite_quantized/__init__.py new file mode 100755 index 00000000..86343c69 --- /dev/null +++ b/qai_hub_models/models/face_det_lite_quantized/__init__.py @@ -0,0 +1,8 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from qai_hub_models.models.face_det_lite.app import FaceDetLiteApp as App # noqa: F401 + +from .model import MODEL_ID # noqa: F401 +from .model import FaceDetLiteQuantizable as Model # noqa: F401 diff --git a/qai_hub_models/models/face_det_lite_quantized/conftest.py b/qai_hub_models/models/face_det_lite_quantized/conftest.py new file mode 100644 index 00000000..c9594b03 --- /dev/null +++ b/qai_hub_models/models/face_det_lite_quantized/conftest.py @@ -0,0 +1,39 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + +import inspect + +import pytest + +from qai_hub_models.models.face_det_lite_quantized import Model +from qai_hub_models.utils.testing import skip_clone_repo_check + + +# Instantiate the model only once for all tests. +# Mock from_pretrained to always return the initialized model. +# This speeds up tests and limits memory leaks. +@pytest.fixture(scope="module", autouse=True) +def cached_from_pretrained(): + with pytest.MonkeyPatch.context() as mp: + pretrained_cache = {} + from_pretrained = Model.from_pretrained + sig = inspect.signature(from_pretrained) + + @skip_clone_repo_check + def _cached_from_pretrained(*args, **kwargs): + cache_key = str(args) + str(kwargs) + model = pretrained_cache.get(cache_key, None) + if model: + return model + else: + model = from_pretrained(*args, **kwargs) + pretrained_cache[cache_key] = model + return model + + _cached_from_pretrained.__signature__ = sig + + mp.setattr(Model, "from_pretrained", _cached_from_pretrained) + yield mp diff --git a/qai_hub_models/models/face_det_lite_quantized/demo.py b/qai_hub_models/models/face_det_lite_quantized/demo.py new file mode 100755 index 00000000..bc10a509 --- /dev/null +++ b/qai_hub_models/models/face_det_lite_quantized/demo.py @@ -0,0 +1,17 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from qai_hub_models.models._shared.face_detection.demo import main as face_det_lite_demo +from qai_hub_models.models.face_det_lite_quantized.model import ( + MODEL_ID, + FaceDetLiteQuantizable, +) + + +def main(is_test: bool = False): + face_det_lite_demo(FaceDetLiteQuantizable, MODEL_ID, is_test) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/face_det_lite_quantized/export.py b/qai_hub_models/models/face_det_lite_quantized/export.py new file mode 100644 index 00000000..07505a89 --- /dev/null +++ b/qai_hub_models/models/face_det_lite_quantized/export.py @@ -0,0 +1,216 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import os +import warnings +from pathlib import Path +from typing import Any, Optional, cast + +import qai_hub as hub + +from qai_hub_models.models.common import ExportResult, TargetRuntime +from qai_hub_models.models.face_det_lite_quantized import Model +from qai_hub_models.utils.args import ( + export_parser, + get_input_spec_kwargs, + get_model_kwargs, +) +from qai_hub_models.utils.compare import torch_inference +from qai_hub_models.utils.printing import ( + print_inference_metrics, + print_on_target_demo_cmd, + print_profile_metrics_from_job, +) +from qai_hub_models.utils.qai_hub_helpers import ( + can_access_qualcomm_ai_hub, + export_without_hub_access, +) + + +def export_model( + device: Optional[str] = None, + chipset: Optional[str] = None, + skip_profiling: bool = False, + skip_inferencing: bool = False, + skip_downloading: bool = False, + skip_summary: bool = False, + output_dir: Optional[str] = None, + target_runtime: TargetRuntime = TargetRuntime.TFLITE, + compile_options: str = "", + profile_options: str = "", + **additional_model_kwargs, +) -> ExportResult | list[str]: + """ + This function executes the following recipe: + + 1. Instantiates a PyTorch model and converts it to a traced TorchScript format + 2. Compiles the model to an asset that can be run on device + 3. Profiles the model performance on a real device + 4. Inferences the model on sample inputs + 5. Downloads the model asset to the local directory + 6. Summarizes the results from profiling and inference + + Each of the last 4 steps can be optionally skipped using the input options. + + Parameters: + device: Device for which to export the model. + Full list of available devices can be found by running `hub.get_devices()`. + Defaults to DEFAULT_DEVICE if not specified. + chipset: If set, will choose a random device with this chipset. + Overrides the `device` argument. + skip_profiling: If set, skips profiling of compiled model on real devices. + skip_inferencing: If set, skips computing on-device outputs from sample data. + skip_downloading: If set, skips downloading of compiled model. + skip_summary: If set, skips waiting for and summarizing results + from profiling and inference. + output_dir: Directory to store generated assets (e.g. compiled model). + Defaults to `/build/`. + target_runtime: Which on-device runtime to target. Default is TFLite. + compile_options: Additional options to pass when submitting the compile job. + profile_options: Additional options to pass when submitting the profile job. + **additional_model_kwargs: Additional optional kwargs used to customize + `model_cls.from_pretrained` and `model.get_input_spec` + + Returns: + A struct of: + * A CompileJob object containing metadata about the compile job submitted to hub. + * An InferenceJob containing metadata about the inference job (None if inferencing skipped). + * A ProfileJob containing metadata about the profile job (None if profiling skipped). + """ + model_name = "face_det_lite_quantized" + output_path = Path(output_dir or Path.cwd() / "build" / model_name) + if not device and not chipset: + raise ValueError("Device or Chipset must be provided.") + hub_device = hub.Device( + name=device or "", attributes=f"chipset:{chipset}" if chipset else None + ) + if not can_access_qualcomm_ai_hub(): + return export_without_hub_access( + "face_det_lite_quantized", + "Lightweight-Face-Detection-Quantized", + device or f"Device (Chipset {chipset})", + skip_profiling, + skip_inferencing, + skip_downloading, + skip_summary, + output_path, + target_runtime, + compile_options, + profile_options, + ) + + # On-device perf improves with I/O in channel_last format for runtimes + # that execute natively in channel_last format. + use_channel_last_format = target_runtime.channel_last_native_execution + + # 1. Instantiates a PyTorch model and converts it to a traced TorchScript format + model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs)) + input_spec = model.get_input_spec( + **get_input_spec_kwargs(model, additional_model_kwargs) + ) + + # Trace the model + source_model = model.convert_to_hub_source_model( + target_runtime, output_path, input_spec + ) + + # 2. Compiles the model to an asset that can be run on device + model_compile_options = model.get_hub_compile_options( + target_runtime, compile_options, hub_device + ) + print(f"Optimizing model {model_name} to run on-device") + submitted_compile_job = hub.submit_compile_job( + model=source_model, + input_specs=input_spec, + device=hub_device, + name=model_name, + calibration_data=model.get_calibration_data(target_runtime), + options=model_compile_options, + ) + compile_job = cast(hub.client.CompileJob, submitted_compile_job) + + # 3. Profiles the model performance on a real device + profile_job: Optional[hub.client.ProfileJob] = None + if not skip_profiling: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print(f"Profiling model {model_name} on a hosted device.") + submitted_profile_job = hub.submit_profile_job( + model=compile_job.get_target_model(), + device=hub_device, + name=model_name, + options=profile_options_all, + ) + profile_job = cast(hub.client.ProfileJob, submitted_profile_job) + + # 4. Inferences the model on sample inputs + inference_job: Optional[hub.client.InferenceJob] = None + if not skip_inferencing: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print( + f"Running inference for {model_name} on a hosted device with example inputs." + ) + sample_inputs = model.sample_inputs( + input_spec, use_channel_last_format=use_channel_last_format + ) + submitted_inference_job = hub.submit_inference_job( + model=compile_job.get_target_model(), + inputs=sample_inputs, + device=hub_device, + name=model_name, + options=profile_options_all, + ) + inference_job = cast(hub.client.InferenceJob, submitted_inference_job) + + # 5. Downloads the model asset to the local directory + if not skip_downloading: + os.makedirs(output_path, exist_ok=True) + target_model: hub.Model = compile_job.get_target_model() # type: ignore + target_model.download(str(output_path / model_name)) + + # 6. Summarizes the results from profiling and inference + if not skip_summary and not skip_profiling: + assert profile_job is not None and profile_job.wait().success + profile_data: dict[str, Any] = profile_job.download_profile() # type: ignore + print_profile_metrics_from_job(profile_job, profile_data) + + if not skip_summary and not skip_inferencing: + sample_inputs = model.sample_inputs(use_channel_last_format=False) + torch_out = torch_inference( + model, sample_inputs, return_channel_last_output=use_channel_last_format + ) + assert inference_job is not None and inference_job.wait().success + inference_result: hub.client.DatasetEntries = inference_job.download_output_data() # type: ignore + + print_inference_metrics( + inference_job, inference_result, torch_out, model.get_output_names() + ) + + if not skip_summary: + print_on_target_demo_cmd(compile_job, Path(__file__).parent, hub_device) + + return ExportResult( + compile_job=compile_job, + inference_job=inference_job, + profile_job=profile_job, + ) + + +def main(): + warnings.filterwarnings("ignore") + parser = export_parser(model_cls=Model) + args = parser.parse_args() + export_model(**vars(args)) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/face_det_lite_quantized/info.yaml b/qai_hub_models/models/face_det_lite_quantized/info.yaml new file mode 100755 index 00000000..c0ce0833 --- /dev/null +++ b/qai_hub_models/models/face_det_lite_quantized/info.yaml @@ -0,0 +1,32 @@ +name: Lightweight-Face-Detection-Quantized +# id must match with the model dir name in qai_hub_models +id: face_det_lite_quantized +status: public +headline: face_det_lite_quantized is a face detection model. +domain: Computer Vision +description: face_det_lite_quantized is a machine learning model that detect face in the images +use_case: Object Detection +tags: + - real-time + - quantized +source_repo: + https://github.com/quic/ai-hub-models/blob/main/qai_hub_models/models/face_det_lite/model.py +license: https://github.com/qcom-ai-hub/ai-hub-models-internal/blob/main/LICENSE +deploy_license: + https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf +technical_details: + Model checkpoint: qfd360_sl_model.pt + Inference latency: RealTime + Input resolution: 480x640 +applicable_scenarios: + - Incabinet driver monitoring, phone unlocking, and building access control. +related_models: [] +form_factors: + - Phone + - Tablet + - IoT +has_static_banner: true +has_animated_banner: true +license_type: bsd-3-clause +deploy_license_type: AI Model Hub License +dataset: [] diff --git a/qai_hub_models/models/face_det_lite_quantized/model.py b/qai_hub_models/models/face_det_lite_quantized/model.py new file mode 100755 index 00000000..12d311c4 --- /dev/null +++ b/qai_hub_models/models/face_det_lite_quantized/model.py @@ -0,0 +1,78 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from __future__ import annotations + +# isort: off +# This verifies aimet is installed, and this must be included first. +from qai_hub_models.utils.quantization_aimet import ( + AIMETQuantizableMixin, +) + +# isort: on + +import torch +from aimet_torch.cross_layer_equalization import equalize_model +from aimet_torch.model_preparer import prepare_model +from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim + +from qai_hub_models.models._shared.face_detection.model import FaceDetLite_model +from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset + +MODEL_ID = "face_det_lite_quantized" +MODEL_ASSET_VERSION = 1 +DEFAULT_ENCODINGS = "face_det_lite_quantized_encodings.json" # TODO neesd to be updated for foot tracknet. + + +class FaceDetLiteQuantizable(AIMETQuantizableMixin, FaceDetLite_model): + """FaceDetLite_model with post train quantization support. + + Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints. + Support for quantizing using your own weights & data will come at a later date.""" + + def __init__( + self, + model: QuantizationSimModel, + ) -> None: + FaceDetLite_model.__init__(self, model.model) + AIMETQuantizableMixin.__init__( + self, + model, + ) + + @classmethod + def from_pretrained( + cls, + aimet_encodings: str | None = "DEFAULT", + ) -> FaceDetLiteQuantizable: + """ + Parameters: + aimet_encodings: + if "DEFAULT": Loads the model with aimet encodings calibrated on imagenette. + elif None: Doesn't load any encodings. Used when computing encodings. + else: Interprets as a filepath and loads the encodings stored there. + """ + model = FaceDetLite_model.from_pretrained() + input_shape = cls.get_input_spec()["input"][0] + model = prepare_model(model) + equalize_model(model, input_shape) + + sim = QuantizationSimModel( + model, + quant_scheme="tf_enhanced", + default_param_bw=8, + default_output_bw=8, + config_file=get_default_aimet_config(), + dummy_input=torch.rand(input_shape), + ) + + if aimet_encodings: + if aimet_encodings == "DEFAULT": + aimet_encodings = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_ENCODINGS + ).fetch() + load_encodings_to_sim(sim, aimet_encodings) + + return cls(sim) diff --git a/qai_hub_models/models/face_det_lite_quantized/perf.yaml b/qai_hub_models/models/face_det_lite_quantized/perf.yaml new file mode 100644 index 00000000..b963253c --- /dev/null +++ b/qai_hub_models/models/face_det_lite_quantized/perf.yaml @@ -0,0 +1,579 @@ +aggregated: + supported_devices: + - Snapdragon 8 Elite QRD + - Samsung Galaxy S24 + - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ + - Samsung Galaxy S23 + - Samsung Galaxy S23 Ultra + - Samsung Galaxy S23+ + - Samsung Galaxy S22 5G + - Samsung Galaxy S22 Ultra 5G + - Samsung Galaxy S22+ 5G + - Samsung Galaxy Tab S8 + - Xiaomi 12 + - Xiaomi 12 Pro + - Samsung Galaxy S21 + - Samsung Galaxy S21 Ultra + - Samsung Galaxy S21+ + - Snapdragon X Elite CRD + - Snapdragon X Plus 8-Core CRD + - SA8775P ADP + - QCS6490 (Proxy) + - RB3 Gen 2 (Proxy) + - QCS8250 (Proxy) + - RB5 (Proxy) + - QCS8450 (Proxy) + - XR2 Gen 2 (Proxy) + - QCS8550 (Proxy) + - SA7255P ADP + - SA8255 (Proxy) + - SA8295P ADP + - SA8650 (Proxy) + supported_chipsets: + - Snapdragon® 8 Elite + - Snapdragon® 8 Gen 3 + - Snapdragon® 8 Gen 2 + - Snapdragon® 8 Gen 1 + - Snapdragon® 888 + - Snapdragon® X Elite + - Snapdragon® X Plus 8-Core + - SA8775P + - QCS6490 Proxy + - QCS8250 Proxy + - QCS8450 Proxy + - QCS8550 Proxy + - SA7255P + - SA8255P Proxy + - SA8295P + - SA8650P Proxy +models: +- name: Lightweight-Face-Detection-Quantized + universal_assets: + torchscript_onnx_tflite: mqv6415lm + torchscript_onnx: mqyv3z7xq + performance_metrics: + - torchscript_onnx_tflite: + inference_time: 281.0 + throughput: 3558.7188612099644 + estimated_peak_memory_range: + min: 12288 + max: 8321224 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 90 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 90 + job_id: j57yeqrq5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 350.0 + throughput: 2857.1428571428573 + estimated_peak_memory_range: + min: 327680 + max: 21451096 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 122 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 122 + job_id: jglvyjve5 + job_status: Passed + torchscript_onnx: + inference_time: 623.0 + throughput: 1605.1364365971108 + estimated_peak_memory_range: + min: 16384 + max: 11538304 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 86 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 86 + job_id: j57yeqyq5 + job_status: Passed + reference_device_info: + name: Samsung Galaxy S23 + os: '13' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 2 + timestamp: '2024-12-11T23:59:08Z' + - torchscript_onnx_tflite: + inference_time: 199.0 + throughput: 5025.125628140703 + estimated_peak_memory_range: + min: 12288 + max: 15740592 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 90 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 90 + job_id: jp4lyzrq5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 248.0 + throughput: 4032.2580645161293 + estimated_peak_memory_range: + min: 0 + max: 19288016 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 122 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 122 + job_id: j56y8kyvp + job_status: Passed + torchscript_onnx: + inference_time: 473.0 + throughput: 2114.164904862579 + estimated_peak_memory_range: + min: 323584 + max: 38212032 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 86 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 86 + job_id: jp4lyzlq5 + job_status: Passed + reference_device_info: + name: Samsung Galaxy S24 + os: '14' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-12-11T23:59:09Z' + - torchscript_onnx_tflite: + inference_time: 170.0 + throughput: 5882.35294117647 + estimated_peak_memory_range: + min: 8192 + max: 12016096 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 90 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 90 + job_id: jpxklwoj5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 242.0 + throughput: 4132.231404958678 + estimated_peak_memory_range: + min: 0 + max: 16437024 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 122 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 122 + job_id: jp3jzyjxg + job_status: Passed + torchscript_onnx: + inference_time: 478.0 + throughput: 2092.050209205021 + estimated_peak_memory_range: + min: 61440 + max: 26046416 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 86 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 86 + job_id: jpxklwkj5 + job_status: Passed + reference_device_info: + name: Snapdragon 8 Elite QRD + os: '15' + form_factor: Phone + os_name: Android + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-12-11T23:59:10Z' + - torchscript_onnx_tflite: + inference_time: 802.0 + throughput: 1246.8827930174564 + estimated_peak_memory_range: + min: 12288 + max: 15836448 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 90 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 90 + job_id: j5mn0jxyp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1012.0 + throughput: 988.1422924901186 + estimated_peak_memory_range: + min: 12288 + max: 6902624 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 122 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 122 + job_id: jgo2lj24p + job_status: Passed + reference_device_info: + name: RB3 Gen 2 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: QCS6490 Proxy + timestamp: '2024-12-11T23:58:57Z' + - torchscript_onnx_tflite: + inference_time: 6076.0 + throughput: 164.58196181698486 + estimated_peak_memory_range: + min: 172032 + max: 10193664 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 90 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 90 + job_id: jgn6zj6v5 + job_status: Passed + reference_device_info: + name: RB5 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: QCS8250 Proxy + timestamp: '2024-12-11T23:58:46Z' + - torchscript_onnx_tflite: + inference_time: 279.0 + throughput: 3584.2293906810037 + estimated_peak_memory_range: + min: 0 + max: 3764560 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 90 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 90 + job_id: jprvlzvvg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 331.0 + throughput: 3021.1480362537764 + estimated_peak_memory_range: + min: 335872 + max: 1487080 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 122 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 122 + job_id: jpv6lj675 + job_status: Passed + reference_device_info: + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: QCS8550 Proxy + timestamp: '2024-12-11T23:58:59Z' + - torchscript_onnx_tflite: + inference_time: 3157.0 + throughput: 316.75641431738995 + estimated_peak_memory_range: + min: 16384 + max: 16598576 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 90 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 90 + job_id: jp2kr2kxp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 3504.0 + throughput: 285.38812785388126 + estimated_peak_memory_range: + min: 253952 + max: 10871696 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 122 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 122 + job_id: jped7jd75 + job_status: Passed + reference_device_info: + name: SA7255P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA7255P + timestamp: '2024-12-11T23:59:01Z' + - torchscript_onnx_tflite: + inference_time: 279.0 + throughput: 3584.2293906810037 + estimated_peak_memory_range: + min: 12288 + max: 33792760 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 90 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 90 + job_id: jpy1o91rp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 337.0 + throughput: 2967.359050445104 + estimated_peak_memory_range: + min: 344064 + max: 1656912 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 122 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 122 + job_id: jgz3l13z5 + job_status: Passed + reference_device_info: + name: SA8255 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8255P Proxy + timestamp: '2024-12-11T23:59:02Z' + - torchscript_onnx_tflite: + inference_time: 678.0 + throughput: 1474.9262536873157 + estimated_peak_memory_range: + min: 12288 + max: 10965360 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 90 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 90 + job_id: jp0zmnz25 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 838.0 + throughput: 1193.3174224343675 + estimated_peak_memory_range: + min: 0 + max: 5778112 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 122 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 122 + job_id: j5weljez5 + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-12-11T23:59:03Z' + - torchscript_onnx_tflite: + inference_time: 280.0 + throughput: 3571.4285714285716 + estimated_peak_memory_range: + min: 16384 + max: 4598384 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 90 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 90 + job_id: jp8qelqzp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 336.0 + throughput: 2976.190476190476 + estimated_peak_memory_range: + min: 339968 + max: 1825048 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 122 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 122 + job_id: jg9lz6lqg + job_status: Passed + reference_device_info: + name: SA8650 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8650P Proxy + timestamp: '2024-12-11T23:59:05Z' + - torchscript_onnx_tflite: + inference_time: 606.0 + throughput: 1650.1650165016501 + estimated_peak_memory_range: + min: 40960 + max: 17261648 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 90 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 90 + job_id: jgke2jeyg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 828.0 + throughput: 1207.729468599034 + estimated_peak_memory_range: + min: 323584 + max: 6261792 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 122 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 122 + job_id: jp14nr4kp + job_status: Passed + reference_device_info: + name: SA8775P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8775P + timestamp: '2024-12-11T23:59:06Z' + - torchscript_onnx_tflite: + inference_time: 341.0 + throughput: 2932.551319648094 + estimated_peak_memory_range: + min: 12288 + max: 15838224 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 90 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 90 + job_id: j5q6lj67p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 418.0 + throughput: 2392.3444976076553 + estimated_peak_memory_range: + min: 323584 + max: 17685520 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 122 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 122 + job_id: jgdxdjxkp + job_status: Passed + reference_device_info: + name: QCS8450 (Proxy) + os: '13' + form_factor: Xr + os_name: Android + manufacturer: Qualcomm + chipset: QCS8450 Proxy + timestamp: '2024-12-11T23:59:07Z' + - torchscript_onnx_qnn: + inference_time: 436.0 + throughput: 2293.577981651376 + estimated_peak_memory_range: + min: 339968 + max: 339968 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 122 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 122 + job_id: jgjvrjv7g + job_status: Passed + torchscript_onnx: + inference_time: 698.0 + throughput: 1432.6647564469913 + estimated_peak_memory_range: + min: 2281472 + max: 2281472 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 86 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 86 + job_id: j5mn0jnyp + job_status: Passed + reference_device_info: + name: Snapdragon X Elite CRD + os: '11' + form_factor: Compute + os_name: Windows + manufacturer: Qualcomm + chipset: Snapdragon® X Elite + timestamp: '2024-12-11T23:59:11Z' diff --git a/qai_hub_models/models/face_det_lite_quantized/test.py b/qai_hub_models/models/face_det_lite_quantized/test.py new file mode 100755 index 00000000..a869eb2e --- /dev/null +++ b/qai_hub_models/models/face_det_lite_quantized/test.py @@ -0,0 +1,60 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +import ast + +import numpy as np + +from qai_hub_models.models.face_det_lite.app import FaceDetLiteApp +from qai_hub_models.models.face_det_lite.demo import main as demo_main +from qai_hub_models.models.face_det_lite.model import MODEL_ASSET_VERSION, MODEL_ID +from qai_hub_models.models.face_det_lite_quantized.model import FaceDetLiteQuantizable +from qai_hub_models.utils.args import ( + demo_model_from_cli_args, + get_model_cli_parser, + get_on_device_demo_parser, +) +from qai_hub_models.utils.asset_loaders import ( + CachedWebModelAsset, + load_image, + load_json, +) +from qai_hub_models.utils.testing import skip_clone_repo_check + +INPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, "test_640x480_Rooney.jpg" +) +OUTPUT_RST_ADDRESS = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, "outputs.json" +) + + +# Verify that the output from Torch is as expected. bbox, landmark +def test_task(): + parser = get_model_cli_parser(FaceDetLiteQuantizable) + parser = get_on_device_demo_parser(parser, add_output_dir=True) + parser.add_argument( + "--image", + type=str, + default=INPUT_IMAGE_ADDRESS, + help="image file path or URL", + ) + args = parser.parse_args([]) + args.aimet_encodings = None # TODO test, remove later + model = demo_model_from_cli_args(FaceDetLiteQuantizable, MODEL_ID, args) + app = FaceDetLiteApp(model) + original_image = load_image(INPUT_IMAGE_ADDRESS) + output_tensor = app.run_inference_on_image(original_image) + output_tensor_oracle = load_json(OUTPUT_RST_ADDRESS) + bounding_box_list = ast.literal_eval(output_tensor_oracle["bounding box"]) + + for i in range(len(output_tensor)): + assert ( + np.array(output_tensor[i]) - np.array(bounding_box_list[i]) + ).mean() < 0.3 + + +@skip_clone_repo_check +def test_demo(): + demo_main() diff --git a/qai_hub_models/models/facemap_3dmm/perf.yaml b/qai_hub_models/models/facemap_3dmm/perf.yaml index e73dc007..bf2eb2c9 100644 --- a/qai_hub_models/models/facemap_3dmm/perf.yaml +++ b/qai_hub_models/models/facemap_3dmm/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: Facial-Landmark-Detection universal_assets: - torchscript_onnx_tflite: mq24z4gwm - torchscript_onnx: mn0x9x93n + torchscript_onnx_tflite: mngg184rn + torchscript_onnx: mqkvkw1km performance_metrics: - torchscript_onnx_tflite: - inference_time: 296.0 - throughput: 3378.3783783783783 + inference_time: 304.0 + throughput: 3289.4736842105262 estimated_peak_memory_range: min: 0 - max: 47144576 + max: 37206848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: jped8k005 + job_id: jp3jzd2xg job_status: Passed torchscript_onnx_qnn: - inference_time: 359.0 - throughput: 2785.515320334262 + inference_time: 356.0 + throughput: 2808.9887640449438 estimated_peak_memory_range: - min: 593920 - max: 179619968 + min: 0 + max: 36805800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jgn6o34m5 + job_id: j5mn07yyp job_status: Passed torchscript_onnx: - inference_time: 466.0 - throughput: 2145.922746781116 + inference_time: 454.0 + throughput: 2202.643171806167 estimated_peak_memory_range: - min: 249856 - max: 1606600 + min: 12288 + max: 13978304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: jgo2o3xdp + job_id: jp3jzy0xg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:31:23Z' + timestamp: '2024-12-11T23:58:14Z' - torchscript_onnx_tflite: - inference_time: 225.0 - throughput: 4444.444444444444 + inference_time: 237.0 + throughput: 4219.4092827004215 estimated_peak_memory_range: - min: 16384 - max: 9108384 + min: 0 + max: 12480032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: jgz38rq65 + job_id: jgo2lxq4p job_status: Passed torchscript_onnx_qnn: - inference_time: 270.0 - throughput: 3703.703703703704 + inference_time: 280.0 + throughput: 3571.4285714285716 estimated_peak_memory_range: min: 0 - max: 11320048 + max: 15531008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jprvoereg + job_id: jgn6zjvv5 job_status: Passed torchscript_onnx: - inference_time: 382.0 - throughput: 2617.801047120419 + inference_time: 371.0 + throughput: 2695.4177897574123 estimated_peak_memory_range: min: 0 - max: 25888960 + max: 26389440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: jpv6ev8m5 + job_id: jgo2lj64p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:31:24Z' + timestamp: '2024-12-11T23:58:15Z' - torchscript_onnx_tflite: - inference_time: 202.0 - throughput: 4950.495049504951 + inference_time: 219.0 + throughput: 4566.2100456621 estimated_peak_memory_range: - min: 0 - max: 8511184 + min: 12288 + max: 8682240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: j5we8q0j5 + job_id: jpv6l8x75 job_status: Passed torchscript_onnx_qnn: - inference_time: 241.0 - throughput: 4149.377593360996 + inference_time: 262.0 + throughput: 3816.793893129771 estimated_peak_memory_range: min: 0 - max: 8816368 + max: 8808480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jp2k4l1mp + job_id: jprvlz3vg job_status: Passed torchscript_onnx: - inference_time: 368.0 - throughput: 2717.391304347826 + inference_time: 367.0 + throughput: 2724.7956403269754 estimated_peak_memory_range: min: 0 - max: 16577616 + max: 16259264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: jgjvoe98g + job_id: jpv6ljk75 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:31:25Z' + timestamp: '2024-12-11T23:58:17Z' - torchscript_onnx_tflite: - inference_time: 301.0 - throughput: 3322.2591362126245 + inference_time: 303.0 + throughput: 3300.3300330033003 estimated_peak_memory_range: min: 20480 - max: 3528496 + max: 58821128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: jg9lkw7vg + job_id: jped7q375 job_status: Passed torchscript_onnx_qnn: - inference_time: 349.0 - throughput: 2865.3295128939826 + inference_time: 352.0 + throughput: 2840.909090909091 estimated_peak_memory_range: min: 229376 - max: 1620864 + max: 1465336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jpy1q6l4p + job_id: jp2kr2yxp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:31:14Z' + timestamp: '2024-12-11T23:58:05Z' - torchscript_onnx_tflite: - inference_time: 6491.0 - throughput: 154.05946695424433 + inference_time: 6475.0 + throughput: 154.44015444015443 estimated_peak_memory_range: - min: 36864 - max: 10230848 + min: 16384 + max: 12656624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: jp147eklp + job_id: j5welknz5 job_status: Passed torchscript_onnx_qnn: - inference_time: 6719.0 - throughput: 148.8316713796696 + inference_time: 6761.0 + throughput: 147.90711433219937 estimated_peak_memory_range: - min: 188416 - max: 5961856 + min: 143360 + max: 10701824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jp8q6zn8p + job_id: jp0zmn025 job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:31:16Z' + timestamp: '2024-12-11T23:58:08Z' - torchscript_onnx_tflite: - inference_time: 302.0 - throughput: 3311.2582781456954 + inference_time: 304.0 + throughput: 3289.4736842105262 estimated_peak_memory_range: min: 0 - max: 119214080 + max: 48208920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: jgdx8oylp + job_id: jp14n9xkp job_status: Passed torchscript_onnx_qnn: - inference_time: 352.0 - throughput: 2840.909090909091 + inference_time: 354.0 + throughput: 2824.858757062147 estimated_peak_memory_range: - min: 229376 - max: 1481520 + min: 237568 + max: 1628752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jgkeo31og + job_id: jp8qelyzp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:31:18Z' + timestamp: '2024-12-11T23:58:09Z' - torchscript_onnx_tflite: - inference_time: 690.0 - throughput: 1449.2753623188405 + inference_time: 712.0 + throughput: 1404.4943820224719 estimated_peak_memory_range: min: 16384 - max: 7869184 + max: 7452304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: j57ykxmr5 + job_id: jgdxdklkp job_status: Passed torchscript_onnx_qnn: - inference_time: 867.0 - throughput: 1153.4025374855826 + inference_time: 918.0 + throughput: 1089.3246187363834 estimated_peak_memory_range: - min: 212992 - max: 6233472 + min: 0 + max: 5878576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: j5q6z3nmp + job_id: jgke2jxyg job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:31:19Z' + timestamp: '2024-12-11T23:58:10Z' - torchscript_onnx_tflite: - inference_time: 293.0 - throughput: 3412.9692832764504 + inference_time: 304.0 + throughput: 3289.4736842105262 estimated_peak_memory_range: min: 0 - max: 212002496 + max: 111460688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: jp4lmv7l5 + job_id: j57yem3q5 job_status: Passed torchscript_onnx_qnn: - inference_time: 350.0 - throughput: 2857.1428571428573 + inference_time: 353.0 + throughput: 2832.8611898016998 estimated_peak_memory_range: - min: 229376 - max: 1580608 + min: 270336 + max: 1550336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jglvo3dl5 + job_id: j5q6ljq7p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:31:20Z' + timestamp: '2024-12-11T23:58:11Z' - torchscript_onnx_tflite: - inference_time: 621.0 - throughput: 1610.3059581320451 + inference_time: 617.0 + throughput: 1620.7455429497568 estimated_peak_memory_range: min: 16384 - max: 7918976 + max: 13999808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: jpxk3yq95 + job_id: jp4ly70q5 job_status: Passed torchscript_onnx_qnn: - inference_time: 874.0 - throughput: 1144.1647597254005 + inference_time: 836.0 + throughput: 1196.1722488038276 estimated_peak_memory_range: - min: 0 - max: 5800480 + min: 217088 + max: 6450688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: j56yrnx7p + job_id: jglvyjme5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:31:21Z' + timestamp: '2024-12-11T23:58:12Z' - torchscript_onnx_tflite: - inference_time: 422.0 - throughput: 2369.6682464454975 + inference_time: 426.0 + throughput: 2347.417840375587 estimated_peak_memory_range: - min: 16384 - max: 13970416 + min: 0 + max: 15623200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: j5mno37qp + job_id: jpxklq2j5 job_status: Passed torchscript_onnx_qnn: - inference_time: 485.0 - throughput: 2061.855670103093 + inference_time: 483.0 + throughput: 2070.3933747412007 estimated_peak_memory_range: - min: 208896 - max: 12756832 + min: 0 + max: 17232032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jp3jxedzg + job_id: j56y8k4vp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,13 +471,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:31:22Z' + timestamp: '2024-12-11T23:58:13Z' - torchscript_onnx_qnn: - inference_time: 434.0 - throughput: 2304.147465437788 + inference_time: 418.0 + throughput: 2392.3444976076553 estimated_peak_memory_range: - min: 372736 - max: 372736 + min: 499712 + max: 499712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jp0zdlwe5 + job_id: jpy1o93rp job_status: Passed torchscript_onnx: - inference_time: 4527.0 - throughput: 220.8968411751712 + inference_time: 458.0 + throughput: 2183.406113537118 estimated_peak_memory_range: - min: 12091392 - max: 12091392 + min: 12992512 + max: 12992512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: jped8kq05 + job_id: jgjvrjn7g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:31:26Z' + timestamp: '2024-12-11T23:58:18Z' diff --git a/qai_hub_models/models/facemap_3dmm_quantized/export.py b/qai_hub_models/models/facemap_3dmm_quantized/export.py index 0c812f54..f04101e4 100644 --- a/qai_hub_models/models/facemap_3dmm_quantized/export.py +++ b/qai_hub_models/models/facemap_3dmm_quantized/export.py @@ -41,7 +41,7 @@ def export_model( skip_downloading: bool = False, skip_summary: bool = False, output_dir: Optional[str] = None, - target_runtime: TargetRuntime = TargetRuntime.QNN, + target_runtime: TargetRuntime = TargetRuntime.TFLITE, compile_options: str = "", profile_options: str = "", **additional_model_kwargs, @@ -207,7 +207,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_tflite=False, supports_onnx=False) + parser = export_parser(model_cls=Model) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/facemap_3dmm_quantized/model.py b/qai_hub_models/models/facemap_3dmm_quantized/model.py index 84582ef7..50944b9d 100644 --- a/qai_hub_models/models/facemap_3dmm_quantized/model.py +++ b/qai_hub_models/models/facemap_3dmm_quantized/model.py @@ -21,7 +21,7 @@ MODEL_ID = __name__.split(".")[-2] MODEL_ASSET_VERSION = 1 -DEFAULT_ENCODINGS = "facemap_3dmm_quantized_encodings.json" +DEFAULT_ENCODINGS = "facemap_3dmm_encodings.json" class FaceMap_3DMMQuantizable(AIMETQuantizableMixin, FaceMap_3DMM): @@ -62,7 +62,7 @@ def from_pretrained( model, quant_scheme="tf_enhanced", default_param_bw=8, - default_output_bw=16, + default_output_bw=8, config_file=get_default_aimet_config(), dummy_input=torch.rand(input_shape), ) diff --git a/qai_hub_models/models/facemap_3dmm_quantized/perf.yaml b/qai_hub_models/models/facemap_3dmm_quantized/perf.yaml index 76817083..04d7ccf2 100644 --- a/qai_hub_models/models/facemap_3dmm_quantized/perf.yaml +++ b/qai_hub_models/models/facemap_3dmm_quantized/perf.yaml @@ -21,6 +21,8 @@ aggregated: - SA8775P ADP - QCS6490 (Proxy) - RB3 Gen 2 (Proxy) + - QCS8250 (Proxy) + - RB5 (Proxy) - QCS8450 (Proxy) - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) @@ -38,6 +40,7 @@ aggregated: - Snapdragon® X Plus 8-Core - SA8775P - QCS6490 Proxy + - QCS8250 Proxy - QCS8450 Proxy - QCS8550 Proxy - SA7255P @@ -46,14 +49,31 @@ aggregated: - SA8650P Proxy models: - name: Facial-Landmark-Detection-Quantized - universal_assets: {} + universal_assets: + torchscript_onnx_tflite: mmr36re2m + torchscript_onnx: mngg18rrn performance_metrics: - - torchscript_onnx_qnn: - inference_time: 237.0 - throughput: 4219.4092827004215 + - torchscript_onnx_tflite: + inference_time: 169.0 + throughput: 5917.15976331361 estimated_peak_memory_range: - min: 12288 - max: 5426704 + min: 0 + max: 98217040 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 43 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 43 + job_id: j5q6lnkep + job_status: Passed + torchscript_onnx_qnn: + inference_time: 232.0 + throughput: 4310.3448275862065 + estimated_peak_memory_range: + min: 28672 + max: 5792040 primary_compute_unit: NPU precision: int8 layer_info: @@ -61,7 +81,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: jgdx8omlp + job_id: jgdxdk96p + job_status: Passed + torchscript_onnx: + inference_time: 338.0 + throughput: 2958.579881656805 + estimated_peak_memory_range: + min: 122880 + max: 113703600 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 76 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 76 + job_id: jpy1ol0rp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -70,13 +105,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:30:27Z' - - torchscript_onnx_qnn: - inference_time: 191.0 - throughput: 5235.602094240838 + timestamp: '2024-12-11T23:57:33Z' + - torchscript_onnx_tflite: + inference_time: 141.0 + throughput: 7092.198581560284 estimated_peak_memory_range: - min: 0 - max: 9524704 + min: 20480 + max: 10916512 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 43 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 43 + job_id: jglvydz25 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 185.0 + throughput: 5405.405405405405 + estimated_peak_memory_range: + min: 61440 + max: 10631776 primary_compute_unit: NPU precision: int8 layer_info: @@ -84,7 +134,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: j57ykx1r5 + job_id: j5welkvz5 + job_status: Passed + torchscript_onnx: + inference_time: 271.0 + throughput: 3690.036900369004 + estimated_peak_memory_range: + min: 61440 + max: 30352704 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 76 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 76 + job_id: jp0zmw725 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -93,13 +158,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:30:28Z' - - torchscript_onnx_qnn: - inference_time: 182.0 - throughput: 5494.505494505494 + timestamp: '2024-12-11T23:57:34Z' + - torchscript_onnx_tflite: + inference_time: 126.0 + throughput: 7936.507936507936 estimated_peak_memory_range: - min: 20480 - max: 9421808 + min: 12288 + max: 9398576 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 43 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 43 + job_id: j56y8xjnp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 176.0 + throughput: 5681.818181818182 + estimated_peak_memory_range: + min: 61440 + max: 9968960 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,7 +187,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: jp4lmv6l5 + job_id: jg9lzr1qg + job_status: Passed + torchscript_onnx: + inference_time: 274.0 + throughput: 3649.6350364963505 + estimated_peak_memory_range: + min: 0 + max: 18030944 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 76 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 76 + job_id: jp8qenvzp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -116,13 +211,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:30:29Z' - - torchscript_onnx_qnn: - inference_time: 810.0 - throughput: 1234.567901234568 + timestamp: '2024-12-11T23:57:35Z' + - torchscript_onnx_tflite: + inference_time: 519.0 + throughput: 1926.7822736030828 estimated_peak_memory_range: min: 12288 - max: 7508608 + max: 12633664 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 43 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 43 + job_id: jp3jzd3mg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 701.0 + throughput: 1426.5335235378031 + estimated_peak_memory_range: + min: 16384 + max: 7095152 primary_compute_unit: NPU precision: int8 layer_info: @@ -130,7 +240,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: jpxk3y895 + job_id: jp14n9lkp job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -139,13 +249,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T02:30:31Z' - - torchscript_onnx_qnn: - inference_time: 228.0 - throughput: 4385.964912280701 + timestamp: '2024-12-11T23:57:23Z' + - torchscript_onnx_tflite: + inference_time: 1870.0 + throughput: 534.75935828877 + estimated_peak_memory_range: + min: 12288 + max: 2638848 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 43 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 43 + job_id: jgo2lx01p + job_status: Passed + reference_device_info: + name: RB5 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: QCS8250 Proxy + timestamp: '2024-12-11T23:57:11Z' + - torchscript_onnx_tflite: + inference_time: 175.0 + throughput: 5714.285714285715 + estimated_peak_memory_range: + min: 16384 + max: 5985808 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 43 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 43 + job_id: jpv6l8oz5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 218.0 + throughput: 4587.155963302752 estimated_peak_memory_range: - min: 131072 - max: 1445808 + min: 77824 + max: 1289640 primary_compute_unit: NPU precision: int8 layer_info: @@ -153,7 +301,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: j5mno31qp + job_id: jgdxdk9kp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -162,13 +310,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:30:32Z' - - torchscript_onnx_qnn: - inference_time: 1727.0 - throughput: 579.0387955993051 + timestamp: '2024-12-11T23:57:24Z' + - torchscript_onnx_tflite: + inference_time: 1191.0 + throughput: 839.6305625524769 + estimated_peak_memory_range: + min: 0 + max: 12384064 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 43 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 43 + job_id: jgjvr9m1g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1415.0 + throughput: 706.7137809187279 estimated_peak_memory_range: - min: 49152 - max: 5733008 + min: 12288 + max: 10423712 primary_compute_unit: NPU precision: int8 layer_info: @@ -176,7 +339,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: jprvoemeg + job_id: jp4ly7oq5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -185,13 +348,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:30:34Z' - - torchscript_onnx_qnn: - inference_time: 230.0 - throughput: 4347.826086956522 + timestamp: '2024-12-11T23:57:26Z' + - torchscript_onnx_tflite: + inference_time: 173.0 + throughput: 5780.346820809248 + estimated_peak_memory_range: + min: 0 + max: 5898144 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 43 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 43 + job_id: jped7q185 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 219.0 + throughput: 4566.2100456621 estimated_peak_memory_range: - min: 131072 - max: 1667832 + min: 86016 + max: 1357064 primary_compute_unit: NPU precision: int8 layer_info: @@ -199,7 +377,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: jp2k4lqmp + job_id: jpxklqjj5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -208,13 +386,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:30:35Z' - - torchscript_onnx_qnn: - inference_time: 661.0 - throughput: 1512.8593040847202 + timestamp: '2024-12-11T23:57:28Z' + - torchscript_onnx_tflite: + inference_time: 452.0 + throughput: 2212.3893805309735 estimated_peak_memory_range: - min: 0 - max: 5773488 + min: 16384 + max: 8096528 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 43 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 43 + job_id: jgz3l6945 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 780.0 + throughput: 1282.051282051282 + estimated_peak_memory_range: + min: 24576 + max: 5857344 primary_compute_unit: NPU precision: int8 layer_info: @@ -222,7 +415,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: jpy1q6k4p + job_id: j5mn072yp job_status: Passed reference_device_info: name: SA8295P ADP @@ -231,13 +424,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:30:36Z' - - torchscript_onnx_qnn: - inference_time: 228.0 - throughput: 4385.964912280701 + timestamp: '2024-12-11T23:57:29Z' + - torchscript_onnx_tflite: + inference_time: 169.0 + throughput: 5917.15976331361 + estimated_peak_memory_range: + min: 28672 + max: 5921616 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 43 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 43 + job_id: j5welkv45 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 219.0 + throughput: 4566.2100456621 estimated_peak_memory_range: - min: 126976 - max: 1295048 + min: 81920 + max: 1559632 primary_compute_unit: NPU precision: int8 layer_info: @@ -245,7 +453,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: jp0zdl8e5 + job_id: jgn6z48v5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -254,13 +462,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:30:38Z' - - torchscript_onnx_qnn: - inference_time: 677.0 - throughput: 1477.1048744460857 + timestamp: '2024-12-11T23:57:30Z' + - torchscript_onnx_tflite: + inference_time: 382.0 + throughput: 2617.801047120419 estimated_peak_memory_range: - min: 0 - max: 5818784 + min: 20480 + max: 12087568 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 43 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 43 + job_id: jg9lzr1mg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 581.0 + throughput: 1721.170395869191 + estimated_peak_memory_range: + min: 12288 + max: 6186368 primary_compute_unit: NPU precision: int8 layer_info: @@ -268,7 +491,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: jp8q6zd8p + job_id: jprvlrjvg job_status: Passed reference_device_info: name: SA8775P ADP @@ -277,13 +500,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:30:39Z' - - torchscript_onnx_qnn: - inference_time: 283.0 - throughput: 3533.5689045936397 + timestamp: '2024-12-11T23:57:31Z' + - torchscript_onnx_tflite: + inference_time: 217.0 + throughput: 4608.294930875576 estimated_peak_memory_range: - min: 0 - max: 12316256 + min: 16384 + max: 12116432 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 43 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 43 + job_id: jp14n9lnp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 280.0 + throughput: 3571.4285714285716 + estimated_peak_memory_range: + min: 65536 + max: 16156272 primary_compute_unit: NPU precision: int8 layer_info: @@ -291,7 +529,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: jgkeo3wog + job_id: jp2kr1nxp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -300,13 +538,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:30:41Z' + timestamp: '2024-12-11T23:57:32Z' - torchscript_onnx_qnn: - inference_time: 335.0 - throughput: 2985.0746268656717 + inference_time: 298.0 + throughput: 3355.7046979865772 estimated_peak_memory_range: - min: 565248 - max: 565248 + min: 684032 + max: 684032 primary_compute_unit: NPU precision: int8 layer_info: @@ -314,7 +552,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: jgn6o3dm5 + job_id: j57yemwq5 + job_status: Passed + torchscript_onnx: + inference_time: 353.0 + throughput: 2832.8611898016998 + estimated_peak_memory_range: + min: 7798784 + max: 7798784 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 76 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 76 + job_id: jgke21myg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -323,4 +576,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:30:33Z' + timestamp: '2024-12-11T23:57:36Z' diff --git a/qai_hub_models/models/fastsam_s/export.py b/qai_hub_models/models/fastsam_s/export.py index 26712fa3..50d9f645 100644 --- a/qai_hub_models/models/fastsam_s/export.py +++ b/qai_hub_models/models/fastsam_s/export.py @@ -43,7 +43,7 @@ def export_model( skip_downloading: bool = False, skip_summary: bool = False, output_dir: Optional[str] = None, - target_runtime: TargetRuntime = TargetRuntime.QNN, + target_runtime: TargetRuntime = TargetRuntime.TFLITE, compile_options: str = "", profile_options: str = "", **additional_model_kwargs, @@ -208,7 +208,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_tflite=False) + parser = export_parser(model_cls=Model) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/fastsam_s/perf.yaml b/qai_hub_models/models/fastsam_s/perf.yaml index 0ce57210..32bbbf08 100644 --- a/qai_hub_models/models/fastsam_s/perf.yaml +++ b/qai_hub_models/models/fastsam_s/perf.yaml @@ -44,14 +44,15 @@ aggregated: models: - name: FastSam-S universal_assets: - torchscript_onnx: mno303dgn + torchscript_onnx_tflite: mqyv3z39q + torchscript_onnx: mm6kvdvvn performance_metrics: - - torchscript_onnx_qnn: - inference_time: 8104.0 - throughput: 123.39585389930899 + - torchscript_onnx_tflite: + inference_time: 7484.0 + throughput: 133.61838588989846 estimated_peak_memory_range: - min: 4939776 - max: 22645296 + min: 4214784 + max: 30534032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -59,14 +60,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: jpxk3yz95 + job_id: jprvlrzkg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 8076.0 + throughput: 123.82367508667657 + estimated_peak_memory_range: + min: 4968448 + max: 19446448 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 286 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 286 + job_id: jgo2lxy1p job_status: Passed torchscript_onnx: - inference_time: 9535.0 - throughput: 104.8767697954903 + inference_time: 9516.0 + throughput: 105.08617065994115 estimated_peak_memory_range: - min: 3317760 - max: 24958928 + min: 3301376 + max: 25489096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -74,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jp3jxerzg + job_id: jpxklqv85 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -83,13 +99,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:29:50Z' - - torchscript_onnx_qnn: - inference_time: 5994.0 - throughput: 166.8335001668335 + timestamp: '2024-12-11T23:56:36Z' + - torchscript_onnx_tflite: + inference_time: 5514.0 + throughput: 181.35654697134567 + estimated_peak_memory_range: + min: 2985984 + max: 42260992 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 286 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 286 + job_id: jp2kr126p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 5990.0 + throughput: 166.9449081803005 estimated_peak_memory_range: - min: 70504448 - max: 108815856 + min: 4931584 + max: 43165360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -97,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: jgn6o3wm5 + job_id: jpv6l83z5 job_status: Passed torchscript_onnx: - inference_time: 6600.0 - throughput: 151.5151515151515 + inference_time: 6599.0 + throughput: 151.53811183512653 estimated_peak_memory_range: - min: 15421440 - max: 101424016 + min: 16580608 + max: 105485120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -112,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jgo2o39dp + job_id: j5mn07r7p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -121,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:29:51Z' - - torchscript_onnx_qnn: - inference_time: 5490.0 - throughput: 182.14936247723134 + timestamp: '2024-12-11T23:56:37Z' + - torchscript_onnx_tflite: + inference_time: 5067.0 + throughput: 197.35543714229328 estimated_peak_memory_range: - min: 4927488 - max: 37049216 + min: 4194304 + max: 37861600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -135,14 +166,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: jprvoe7eg + job_id: jpy1ol90p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 5501.0 + throughput: 181.78512997636793 + estimated_peak_memory_range: + min: 4931584 + max: 37871488 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 286 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 286 + job_id: jgjvr9x1g job_status: Passed torchscript_onnx: - inference_time: 5363.0 - throughput: 186.46280067126608 + inference_time: 5364.0 + throughput: 186.42803877703207 estimated_peak_memory_range: - min: 17149952 - max: 67150512 + min: 13389824 + max: 65001760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -150,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jpv6evnm5 + job_id: jgn6z42j5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -159,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:29:52Z' - - torchscript_onnx_qnn: - inference_time: 7622.0 - throughput: 131.19916032537392 + timestamp: '2024-12-11T23:56:38Z' + - torchscript_onnx_tflite: + inference_time: 7515.0 + throughput: 133.06719893546241 estimated_peak_memory_range: - min: 4956160 - max: 6113272 + min: 4542464 + max: 39114656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -173,7 +219,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: jp2k4lzmp + job_id: jp0zmwy05 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 7645.0 + throughput: 130.80444735120994 + estimated_peak_memory_range: + min: 4980736 + max: 6346576 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 286 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 286 + job_id: jped7q985 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -182,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:29:40Z' - - torchscript_onnx_qnn: - inference_time: 257585.0 - throughput: 3.882213638216511 + timestamp: '2024-12-11T23:56:26Z' + - torchscript_onnx_tflite: + inference_time: 256715.0 + throughput: 3.8953703523362484 estimated_peak_memory_range: - min: 4931584 - max: 10424128 + min: 5210112 + max: 38633136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +257,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: jp0zdlxe5 + job_id: jp8qenoqp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 257465.0 + throughput: 3.8840230710970425 + estimated_peak_memory_range: + min: 647168 + max: 11309616 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 286 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 286 + job_id: j5welko45 job_status: Passed reference_device_info: name: SA7255P ADP @@ -205,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:29:43Z' - - torchscript_onnx_qnn: - inference_time: 7649.0 - throughput: 130.73604392731076 + timestamp: '2024-12-11T23:56:29Z' + - torchscript_onnx_tflite: + inference_time: 7543.0 + throughput: 132.57324671881216 estimated_peak_memory_range: - min: 4960256 - max: 10278720 + min: 4538368 + max: 39333088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,7 +295,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: jp8q6zk8p + job_id: jgke21zvg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 7776.0 + throughput: 128.6008230452675 + estimated_peak_memory_range: + min: 4988928 + max: 6246784 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 286 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 286 + job_id: jg9lzrvmg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -228,13 +319,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:29:44Z' - - torchscript_onnx_qnn: - inference_time: 13964.0 - throughput: 71.61271841879118 + timestamp: '2024-12-11T23:56:30Z' + - torchscript_onnx_tflite: + inference_time: 14439.0 + throughput: 69.25687374471916 + estimated_peak_memory_range: + min: 4554752 + max: 35073760 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 286 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 286 + job_id: j5q6ln8ep + job_status: Passed + torchscript_onnx_qnn: + inference_time: 14574.0 + throughput: 68.61534239055852 estimated_peak_memory_range: - min: 53248 - max: 6072720 + min: 57344 + max: 5951504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -242,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: jgkeo3kog + job_id: jp14n90np job_status: Passed reference_device_info: name: SA8295P ADP @@ -251,13 +357,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:29:45Z' - - torchscript_onnx_qnn: - inference_time: 7680.0 - throughput: 130.20833333333334 + timestamp: '2024-12-11T23:56:31Z' + - torchscript_onnx_tflite: + inference_time: 7600.0 + throughput: 131.57894736842104 estimated_peak_memory_range: - min: 4988928 - max: 6490360 + min: 4542464 + max: 32983088 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 286 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 286 + job_id: jglvydn25 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 7782.0 + throughput: 128.5016705217168 + estimated_peak_memory_range: + min: 4952064 + max: 6205440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: j5q6z3dmp + job_id: jgdxdkw6p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -274,13 +395,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:29:46Z' - - torchscript_onnx_qnn: - inference_time: 14140.0 - throughput: 70.72135785007072 + timestamp: '2024-12-11T23:56:32Z' + - torchscript_onnx_tflite: + inference_time: 13822.0 + throughput: 72.34843003906815 + estimated_peak_memory_range: + min: 4554752 + max: 38472672 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 286 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 286 + job_id: j56y8x6np + job_status: Passed + torchscript_onnx_qnn: + inference_time: 14189.0 + throughput: 70.47713017125943 estimated_peak_memory_range: - min: 2007040 - max: 7734528 + min: 2273280 + max: 8066096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -288,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: jglvo3ql5 + job_id: j57yemzn5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -297,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:29:47Z' - - torchscript_onnx_qnn: - inference_time: 14048.0 - throughput: 71.18451025056947 + timestamp: '2024-12-11T23:56:34Z' + - torchscript_onnx_tflite: + inference_time: 13182.0 + throughput: 75.86102260658474 estimated_peak_memory_range: - min: 4947968 - max: 39393536 + min: 3784704 + max: 42749792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -311,7 +447,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: j56yrn07p + job_id: jp3jzdkmg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 14030.0 + throughput: 71.27583749109051 + estimated_peak_memory_range: + min: 4931584 + max: 42392256 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 286 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 286 + job_id: jp4ly7q25 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -320,13 +471,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:29:49Z' - - torchscript_onnx: - inference_time: 9897.0 - throughput: 101.0407194099222 + timestamp: '2024-12-11T23:56:35Z' + - torchscript_onnx_qnn: + inference_time: 8189.0 + throughput: 122.11503236048358 + estimated_peak_memory_range: + min: 4923392 + max: 4923392 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 286 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 286 + job_id: jgz3l6e45 + job_status: Passed + torchscript_onnx: + inference_time: 9852.0 + throughput: 101.50223304912708 estimated_peak_memory_range: - min: 21348352 - max: 21348352 + min: 22867968 + max: 22867968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -334,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jgjvoe88g + job_id: jprvlrkkg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -343,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:29:53Z' + timestamp: '2024-12-11T23:56:39Z' diff --git a/qai_hub_models/models/fastsam_x/export.py b/qai_hub_models/models/fastsam_x/export.py index 64eabb3e..8e175822 100644 --- a/qai_hub_models/models/fastsam_x/export.py +++ b/qai_hub_models/models/fastsam_x/export.py @@ -43,7 +43,7 @@ def export_model( skip_downloading: bool = False, skip_summary: bool = False, output_dir: Optional[str] = None, - target_runtime: TargetRuntime = TargetRuntime.QNN, + target_runtime: TargetRuntime = TargetRuntime.TFLITE, compile_options: str = "", profile_options: str = "", **additional_model_kwargs, @@ -208,7 +208,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_tflite=False) + parser = export_parser(model_cls=Model) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/fastsam_x/perf.yaml b/qai_hub_models/models/fastsam_x/perf.yaml index 570eccfd..fe44d52f 100644 --- a/qai_hub_models/models/fastsam_x/perf.yaml +++ b/qai_hub_models/models/fastsam_x/perf.yaml @@ -44,14 +44,30 @@ aggregated: models: - name: FastSam-X universal_assets: - torchscript_onnx: mnwe1edrn + torchscript_onnx_tflite: mn1wzrz4m + torchscript_onnx: mmd3y2y0n performance_metrics: - - torchscript_onnx_qnn: - inference_time: 45029.0 - throughput: 22.207910457705033 + - torchscript_onnx_tflite: + inference_time: 47109.0 + throughput: 21.227366320660597 estimated_peak_memory_range: - min: 4927488 - max: 21008608 + min: 4587520 + max: 56641064 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 418 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 418 + job_id: jgdxdkk6p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 45612.0 + throughput: 21.924055073226345 + estimated_peak_memory_range: + min: 8429568 + max: 24712336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -59,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 418 - job_id: j57ykxoq5 + job_id: jp8qenlqp job_status: Passed torchscript_onnx: - inference_time: 46786.0 - throughput: 21.373915273799856 + inference_time: 47961.0 + throughput: 20.850274181105483 estimated_peak_memory_range: - min: 94208 - max: 165034184 + min: 0 + max: 165518072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -74,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 421 - job_id: jglvo38e5 + job_id: j5welkj45 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -83,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:29:01Z' - - torchscript_onnx_qnn: - inference_time: 34740.0 - throughput: 28.785261945883708 + timestamp: '2024-12-11T23:55:47Z' + - torchscript_onnx_tflite: + inference_time: 34705.0 + throughput: 28.814291888776832 estimated_peak_memory_range: - min: 4931584 - max: 69038736 + min: 2605056 + max: 66082304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -97,14 +113,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 418 - job_id: jp4lmveq5 + job_id: j57yemmn5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 34385.0 + throughput: 29.08244874218409 + estimated_peak_memory_range: + min: 0 + max: 60979312 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 418 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 418 + job_id: jgke21jvg job_status: Passed torchscript_onnx: - inference_time: 35254.0 - throughput: 28.365575537527658 + inference_time: 35036.0 + throughput: 28.54207101267268 estimated_peak_memory_range: - min: 14872576 - max: 180953664 + min: 0 + max: 167378480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -112,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 421 - job_id: j56yrnmvp + job_id: jg9lzr6mg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -121,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:29:02Z' - - torchscript_onnx_qnn: - inference_time: 30954.0 - throughput: 32.306002455256184 + timestamp: '2024-12-11T23:55:48Z' + - torchscript_onnx_tflite: + inference_time: 31064.0 + throughput: 32.19160442956477 estimated_peak_memory_range: - min: 3059712 - max: 63632416 + min: 3915776 + max: 64516336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -135,14 +166,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 418 - job_id: jpxk3y0j5 + job_id: jp4ly7725 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 30789.0 + throughput: 32.47913215758875 + estimated_peak_memory_range: + min: 4927488 + max: 65028320 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 418 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 418 + job_id: j5q6lnjep job_status: Passed torchscript_onnx: - inference_time: 31566.0 - throughput: 31.679655325350062 + inference_time: 31618.0 + throughput: 31.62755392497944 estimated_peak_memory_range: - min: 15454208 - max: 96815280 + min: 14610432 + max: 97625040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -150,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 421 - job_id: jp3jxe7xg + job_id: jp14n9rnp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -159,13 +205,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:29:03Z' - - torchscript_onnx_qnn: - inference_time: 42922.0 - throughput: 23.298075578957178 + timestamp: '2024-12-11T23:55:49Z' + - torchscript_onnx_tflite: + inference_time: 47110.0 + throughput: 21.226915729144554 + estimated_peak_memory_range: + min: 3620864 + max: 52297816 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 418 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 418 + job_id: jpxklqq85 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 43338.0 + throughput: 23.074438137431354 estimated_peak_memory_range: - min: 5058560 - max: 6354144 + min: 5066752 + max: 8508016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -173,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 418 - job_id: jgn6o3qv5 + job_id: jglvydj25 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -182,13 +243,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:28:52Z' - - torchscript_onnx_qnn: - inference_time: 2095966.0 - throughput: 0.47710697597193846 + timestamp: '2024-12-11T23:55:38Z' + - torchscript_onnx_tflite: + inference_time: 2097849.0 + throughput: 0.47667873140535855 + estimated_peak_memory_range: + min: 208896 + max: 62914192 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 418 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 418 + job_id: j5mn0777p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 2095808.0 + throughput: 0.4771429443918527 estimated_peak_memory_range: - min: 3821568 - max: 9143408 + min: 3166208 + max: 13952880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 418 - job_id: jp2k4ldxp + job_id: jp3jzdymg job_status: Passed reference_device_info: name: SA7255P ADP @@ -205,13 +281,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:28:54Z' - - torchscript_onnx_qnn: - inference_time: 44325.0 - throughput: 22.560631697687537 + timestamp: '2024-12-11T23:55:40Z' + - torchscript_onnx_tflite: + inference_time: 45518.0 + throughput: 21.9693308141834 + estimated_peak_memory_range: + min: 4575232 + max: 54841504 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 418 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 418 + job_id: jgn6z44j5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 43083.0 + throughput: 23.211011303762504 estimated_peak_memory_range: - min: 5046272 - max: 10472704 + min: 5066752 + max: 6369928 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 418 - job_id: jpy1q62rp + job_id: jgo2lxj1p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -228,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:28:55Z' - - torchscript_onnx_qnn: - inference_time: 82797.0 - throughput: 12.07773228498617 + timestamp: '2024-12-11T23:55:41Z' + - torchscript_onnx_tflite: + inference_time: 93866.0 + throughput: 10.653484754863316 estimated_peak_memory_range: - min: 1372160 - max: 7065216 + min: 4554752 + max: 62495248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -242,7 +333,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 418 - job_id: jp0zdl925 + job_id: jprvlrrkg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 82776.0 + throughput: 12.080796366096454 + estimated_peak_memory_range: + min: 4644864 + max: 10716432 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 418 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 418 + job_id: jpv6l8jz5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -251,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:28:56Z' - - torchscript_onnx_qnn: - inference_time: 43338.0 - throughput: 23.074438137431354 + timestamp: '2024-12-11T23:55:43Z' + - torchscript_onnx_tflite: + inference_time: 46394.0 + throughput: 21.554511359227487 estimated_peak_memory_range: - min: 5074944 - max: 6392960 + min: 3858432 + max: 56298736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -265,7 +371,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 418 - job_id: jp8q6zrzp + job_id: jp2kr116p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 43710.0 + throughput: 22.878059940517044 + estimated_peak_memory_range: + min: 5099520 + max: 6377520 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 418 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 418 + job_id: jgjvr9j1g job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -274,13 +395,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:28:57Z' - - torchscript_onnx_qnn: - inference_time: 85641.0 - throughput: 11.676650202589881 + timestamp: '2024-12-11T23:55:44Z' + - torchscript_onnx_tflite: + inference_time: 87602.0 + throughput: 11.415264491678272 + estimated_peak_memory_range: + min: 4644864 + max: 66907152 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 418 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 418 + job_id: jpy1oll0p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 85644.0 + throughput: 11.676241184437906 estimated_peak_memory_range: - min: 892928 - max: 6830672 + min: 2379776 + max: 13150112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -288,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 418 - job_id: jgkeo30yg + job_id: jped7qj85 job_status: Passed reference_device_info: name: SA8775P ADP @@ -297,13 +433,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:28:59Z' - - torchscript_onnx_qnn: - inference_time: 89247.0 - throughput: 11.20485842661378 + timestamp: '2024-12-11T23:55:45Z' + - torchscript_onnx_tflite: + inference_time: 93467.0 + throughput: 10.698963270459092 + estimated_peak_memory_range: + min: 4550656 + max: 68956640 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 418 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 418 + job_id: jp0zmwn05 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 99489.0 + throughput: 10.051362462181748 estimated_peak_memory_range: - min: 4141056 - max: 64376928 + min: 3842048 + max: 68462144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -311,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 418 - job_id: j5q6z317p + job_id: jgz3l6145 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -320,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:29:00Z' + timestamp: '2024-12-11T23:55:46Z' - torchscript_onnx_qnn: - inference_time: 44450.0 - throughput: 22.49718785151856 + inference_time: 44471.0 + throughput: 22.48656427784399 estimated_peak_memory_range: min: 4923392 max: 4923392 @@ -334,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 418 - job_id: jprvoedvg + job_id: j56y8xknp job_status: Passed torchscript_onnx: - inference_time: 49475.0 - throughput: 20.212228398180898 + inference_time: 49543.0 + throughput: 20.18448620390368 estimated_peak_memory_range: - min: 146210816 - max: 146210816 + min: 147189760 + max: 147189760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -349,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 421 - job_id: jgo2o3w4p + job_id: jgdxdkj6p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -358,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:29:04Z' + timestamp: '2024-12-11T23:55:50Z' diff --git a/qai_hub_models/models/fcn_resnet50/perf.yaml b/qai_hub_models/models/fcn_resnet50/perf.yaml index ae4b3e62..e276defa 100644 --- a/qai_hub_models/models/fcn_resnet50/perf.yaml +++ b/qai_hub_models/models/fcn_resnet50/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: FCN-ResNet50 universal_assets: - torchscript_onnx_tflite: mq24z4wlm - torchscript_onnx: mnlvgvrkm + torchscript_onnx_tflite: mn1wzdl8m + torchscript_onnx: mqp3zl60m performance_metrics: - torchscript_onnx_tflite: - inference_time: 48668.0 - throughput: 20.54738226349963 + inference_time: 48467.0 + throughput: 20.632595374172116 estimated_peak_memory_range: - min: 348160 - max: 18332752 + min: 319488 + max: 20373480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 88 - job_id: jp2k4lrxp + job_id: jgjvr96eg job_status: Passed torchscript_onnx_qnn: - inference_time: 48277.0 - throughput: 20.713797460488433 + inference_time: 48469.0 + throughput: 20.63174400132043 estimated_peak_memory_range: min: 3305472 - max: 22007280 + max: 22312480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jgjvoer7g + job_id: j57yem1n5 job_status: Passed torchscript_onnx: - inference_time: 43588.0 - throughput: 22.94209415435441 + inference_time: 43370.0 + throughput: 23.05741295826608 estimated_peak_memory_range: - min: 73728 - max: 82361352 + min: 3330048 + max: 837490592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 130 - job_id: jgn6o31v5 + job_id: j5q6lnnep job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:28:06Z' + timestamp: '2024-12-11T23:54:54Z' - torchscript_onnx_tflite: - inference_time: 35692.0 - throughput: 28.017482909335424 + inference_time: 35460.0 + throughput: 28.20078962210942 estimated_peak_memory_range: - min: 319488 - max: 97326224 + min: 323584 + max: 97973376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 88 - job_id: jpy1q6orp + job_id: jped7q0v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 35984.0 - throughput: 27.790128946198312 + inference_time: 35575.0 + throughput: 28.109627547434997 estimated_peak_memory_range: - min: 3280896 - max: 99311568 + min: 3309568 + max: 95680192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jped8k775 + job_id: jp4ly7625 job_status: Passed torchscript_onnx: - inference_time: 33635.0 - throughput: 29.730935037906942 + inference_time: 33800.0 + throughput: 29.585798816568047 estimated_peak_memory_range: - min: 0 - max: 227418096 + min: 3887104 + max: 231527648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 130 - job_id: jprvoexvg + job_id: jglvydd25 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:28:07Z' + timestamp: '2024-12-11T23:54:55Z' - torchscript_onnx_tflite: - inference_time: 34871.0 - throughput: 28.67712425797941 + inference_time: 29676.0 + throughput: 33.69726378218089 estimated_peak_memory_range: - min: 290816 - max: 91655808 + min: 299008 + max: 92892976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 88 - job_id: jp0zdlm25 + job_id: jgz3l6qx5 job_status: Passed torchscript_onnx_qnn: - inference_time: 29555.0 - throughput: 33.835222466587716 + inference_time: 34714.0 + throughput: 28.80682145532062 estimated_peak_memory_range: min: 3260416 - max: 94908192 + max: 94696976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jgz38rlz5 + job_id: jpxklq885 job_status: Passed torchscript_onnx: - inference_time: 32448.0 - throughput: 30.81854043392505 + inference_time: 31645.0 + throughput: 31.600568810238585 estimated_peak_memory_range: - min: 3817472 - max: 153633312 + min: 3633152 + max: 152854832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 130 - job_id: jp2k4loxp + job_id: j56y8xxnp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:28:08Z' + timestamp: '2024-12-11T23:54:56Z' - torchscript_onnx_tflite: - inference_time: 48109.0 - throughput: 20.786131493067824 + inference_time: 48482.0 + throughput: 20.62621178994266 estimated_peak_memory_range: - min: 65536 - max: 18806488 + min: 389120 + max: 19245488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 88 - job_id: jgkeo32yg + job_id: j5welk0m5 job_status: Passed torchscript_onnx_qnn: - inference_time: 43504.0 - throughput: 22.986392055902904 + inference_time: 43714.0 + throughput: 22.87596650958503 estimated_peak_memory_range: - min: 3371008 - max: 4641432 + min: 3391488 + max: 4674552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: j5we8qlz5 + job_id: j5mn0717p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:27:57Z' + timestamp: '2024-12-11T23:54:46Z' - torchscript_onnx_tflite: - inference_time: 2068721.0 - throughput: 0.48339046202943753 + inference_time: 2068649.0 + throughput: 0.4834072865913937 estimated_peak_memory_range: - min: 81920 - max: 90545984 + min: 364544 + max: 91050400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 88 - job_id: j5q6z3l7p + job_id: jg9lzr78g job_status: Passed torchscript_onnx_qnn: - inference_time: 2060867.0 - throughput: 0.48523267149214383 + inference_time: 2060980.0 + throughput: 0.48520606701666197 estimated_peak_memory_range: - min: 487424 - max: 7584720 + min: 1466368 + max: 11765824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jp147enkp + job_id: jprvlrmkg job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:27:59Z' + timestamp: '2024-12-11T23:54:48Z' - torchscript_onnx_tflite: - inference_time: 48503.0 - throughput: 20.6172814052739 + inference_time: 48154.0 + throughput: 20.766706815633178 estimated_peak_memory_range: - min: 118784 - max: 18042224 + min: 331776 + max: 18832992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 88 - job_id: jglvo3ye5 + job_id: jp14n9k7p job_status: Passed torchscript_onnx_qnn: - inference_time: 44059.0 - throughput: 22.696838330420572 + inference_time: 43515.0 + throughput: 22.98058140870964 estimated_peak_memory_range: - min: 4292608 - max: 5548192 + min: 3416064 + max: 5071568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jgdx8odkp + job_id: jp2kr1q6p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:28:00Z' + timestamp: '2024-12-11T23:54:49Z' - torchscript_onnx_tflite: - inference_time: 84801.0 - throughput: 11.792313769884789 + inference_time: 84779.0 + throughput: 11.795373854374315 estimated_peak_memory_range: - min: 352256 - max: 40631728 + min: 307200 + max: 41507728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 88 - job_id: j56yrn8vp + job_id: j5welk045 job_status: Passed torchscript_onnx_qnn: - inference_time: 81195.0 - throughput: 12.316029312149762 + inference_time: 81213.0 + throughput: 12.313299594892444 estimated_peak_memory_range: - min: 192512 - max: 6134592 + min: 5103616 + max: 11157408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: j57ykxeq5 + job_id: jpy1olk0p job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:28:01Z' + timestamp: '2024-12-11T23:54:50Z' - torchscript_onnx_tflite: - inference_time: 48852.0 - throughput: 20.469990993203965 + inference_time: 48620.0 + throughput: 20.567667626491154 estimated_peak_memory_range: - min: 348160 - max: 20290696 + min: 45056 + max: 18607608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 88 - job_id: jp3jxezxg + job_id: jg9lzr7mg job_status: Passed torchscript_onnx_qnn: - inference_time: 43640.0 - throughput: 22.9147571035747 + inference_time: 43591.0 + throughput: 22.940515243972378 estimated_peak_memory_range: - min: 4100096 - max: 5351272 + min: 3432448 + max: 4872544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jp4lmvyq5 + job_id: jp0zmww05 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:28:02Z' + timestamp: '2024-12-11T23:54:51Z' - torchscript_onnx_tflite: - inference_time: 95278.0 - throughput: 10.495602342618444 + inference_time: 95279.0 + throughput: 10.495492186106068 estimated_peak_memory_range: - min: 303104 - max: 90458816 + min: 389120 + max: 91499648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 88 - job_id: jgo2o3l4p + job_id: jp14n9knp job_status: Passed torchscript_onnx_qnn: - inference_time: 89441.0 - throughput: 11.18055477912814 + inference_time: 89449.0 + throughput: 11.179554830126664 estimated_peak_memory_range: - min: 221184 - max: 5898064 + min: 856064 + max: 6901216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jpxk3ylj5 + job_id: jp8qennqp job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:28:03Z' + timestamp: '2024-12-11T23:54:52Z' - torchscript_onnx_tflite: - inference_time: 78386.0 - throughput: 12.757380144413544 + inference_time: 76934.0 + throughput: 12.998154262094783 estimated_peak_memory_range: - min: 319488 - max: 48672208 + min: 327680 + max: 49624192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 88 - job_id: jpv6evl75 + job_id: jgdxdky6p job_status: Passed torchscript_onnx_qnn: - inference_time: 75750.0 - throughput: 13.201320132013201 + inference_time: 74974.0 + throughput: 13.337957158481608 estimated_peak_memory_range: - min: 3301376 - max: 50081872 + min: 3260416 + max: 48273104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: j5mno30yp + job_id: jgke211vg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:28:05Z' + timestamp: '2024-12-11T23:54:53Z' - torchscript_onnx_qnn: - inference_time: 44221.0 - throughput: 22.613690328124648 + inference_time: 44218.0 + throughput: 22.615224569179972 estimated_peak_memory_range: min: 3256320 max: 3256320 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jg9lkwzqg + job_id: jgn6z4dj5 job_status: Passed torchscript_onnx: - inference_time: 45093.0 - throughput: 22.17639101412636 + inference_time: 45236.0 + throughput: 22.10628702803077 estimated_peak_memory_range: - min: 66424832 - max: 66424832 + min: 67706880 + max: 67706880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 130 - job_id: jp0zdlo25 + job_id: jp3jzddmg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:28:09Z' + timestamp: '2024-12-11T23:54:57Z' diff --git a/qai_hub_models/models/fcn_resnet50_quantized/evaluate.py b/qai_hub_models/models/fcn_resnet50_quantized/evaluate.py index fc2aad39..f0121548 100644 --- a/qai_hub_models/models/fcn_resnet50_quantized/evaluate.py +++ b/qai_hub_models/models/fcn_resnet50_quantized/evaluate.py @@ -25,6 +25,7 @@ def main(): model_cls=Model, default_split_size=400, supported_datasets=SUPPORTED_DATASETS, + supports_onnx=False, is_hub_quantized=True, ) args = parser.parse_args() diff --git a/qai_hub_models/models/fcn_resnet50_quantized/export.py b/qai_hub_models/models/fcn_resnet50_quantized/export.py index 80467c55..2a4edbe7 100644 --- a/qai_hub_models/models/fcn_resnet50_quantized/export.py +++ b/qai_hub_models/models/fcn_resnet50_quantized/export.py @@ -237,7 +237,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, is_hub_quantized=True) + parser = export_parser(model_cls=Model, supports_onnx=False, is_hub_quantized=True) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/fcn_resnet50_quantized/perf.yaml b/qai_hub_models/models/fcn_resnet50_quantized/perf.yaml index fa48b80c..d2f472e0 100644 --- a/qai_hub_models/models/fcn_resnet50_quantized/perf.yaml +++ b/qai_hub_models/models/fcn_resnet50_quantized/perf.yaml @@ -50,14 +50,14 @@ aggregated: models: - name: FCN-ResNet50-Quantized universal_assets: - torchscript_onnx_tflite: mnjxklekq + torchscript_onnx_tflite: mqp3z0d0m performance_metrics: - torchscript_onnx_tflite: - inference_time: 15139.0 - throughput: 66.05456106744171 + inference_time: 15286.0 + throughput: 65.41933795629988 estimated_peak_memory_range: - min: 36864 - max: 18690664 + min: 57344 + max: 14731632 primary_compute_unit: NPU precision: int8 layer_info: @@ -65,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 90 - job_id: j56yrqwnp + job_id: jgz3l9ex5 job_status: Passed torchscript_onnx_qnn: - inference_time: 17116.0 - throughput: 58.424865622809065 + inference_time: 17160.0 + throughput: 58.27505827505828 estimated_peak_memory_range: - min: 122880 - max: 18581384 + min: 28672 + max: 23482040 primary_compute_unit: NPU precision: int8 layer_info: @@ -80,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 130 - job_id: jp4lmj325 + job_id: j5mn02r7p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -89,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T04:04:19Z' + timestamp: '2024-12-12T01:32:44Z' - torchscript_onnx_tflite: - inference_time: 11353.0 - throughput: 88.08244516867788 + inference_time: 11370.0 + throughput: 87.95074758135443 estimated_peak_memory_range: - min: 303104 - max: 47387536 + min: 307200 + max: 49812624 primary_compute_unit: NPU precision: int8 layer_info: @@ -103,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 90 - job_id: jp3jxq6mg + job_id: j5welvom5 job_status: Passed torchscript_onnx_qnn: - inference_time: 12899.0 - throughput: 77.52538956508256 + inference_time: 12760.0 + throughput: 78.36990595611286 estimated_peak_memory_range: min: 827392 - max: 43836704 + max: 46154816 primary_compute_unit: NPU precision: int8 layer_info: @@ -118,7 +118,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 130 - job_id: jpxk3ex85 + job_id: jgn6zy2j5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -127,13 +127,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T04:04:21Z' + timestamp: '2024-12-12T01:32:47Z' - torchscript_onnx_tflite: - inference_time: 9425.0 - throughput: 106.10079575596816 + inference_time: 11119.0 + throughput: 89.93614533681087 estimated_peak_memory_range: - min: 8192 - max: 42143920 + min: 282624 + max: 45011968 primary_compute_unit: NPU precision: int8 layer_info: @@ -141,14 +141,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 90 - job_id: jgo2oe81p + job_id: jg9lz1v8g job_status: Passed torchscript_onnx_qnn: - inference_time: 10790.0 - throughput: 92.67840593141798 + inference_time: 12781.0 + throughput: 78.24113919098662 estimated_peak_memory_range: min: 823296 - max: 42564512 + max: 45992272 primary_compute_unit: NPU precision: int8 layer_info: @@ -156,7 +156,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 130 - job_id: j5mnov87p + job_id: jprvlqkkg job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -165,13 +165,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T04:04:24Z' + timestamp: '2024-12-12T01:32:49Z' - torchscript_onnx_qnn: - inference_time: 139127.0 - throughput: 7.187677445786943 + inference_time: 138979.0 + throughput: 7.195331668813274 estimated_peak_memory_range: - min: 1490944 - max: 9204160 + min: 1359872 + max: 9014208 primary_compute_unit: NPU precision: int8 layer_info: @@ -179,7 +179,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 130 - job_id: jgn6orkj5 + job_id: jp2kr686p job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -188,13 +188,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T04:03:59Z' + timestamp: '2024-12-12T01:32:51Z' - torchscript_onnx_tflite: - inference_time: 1383080.0 - throughput: 0.7230239754750267 + inference_time: 1391254.0 + throughput: 0.7187760107068875 estimated_peak_memory_range: - min: 90714112 - max: 166178224 + min: 90972160 + max: 107590112 primary_compute_unit: NPU precision: int8 layer_info: @@ -202,7 +202,7 @@ models: layers_on_gpu: 11 layers_on_cpu: 12 total_layers: 90 - job_id: jgjvokq1g + job_id: jgdxd9wzp job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -211,13 +211,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T04:03:34Z' + timestamp: '2024-12-12T01:32:26Z' - torchscript_onnx_tflite: - inference_time: 15186.0 - throughput: 65.85012511523772 + inference_time: 15167.0 + throughput: 65.93261686556339 estimated_peak_memory_range: - min: 389120 - max: 16529704 + min: 286720 + max: 18636696 primary_compute_unit: NPU precision: int8 layer_info: @@ -225,14 +225,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 90 - job_id: jped84y85 + job_id: j5welvo45 job_status: Passed torchscript_onnx_qnn: - inference_time: 14114.0 - throughput: 70.85163667280715 + inference_time: 14315.0 + throughput: 69.856793573175 estimated_peak_memory_range: - min: 888832 - max: 2011320 + min: 864256 + max: 2418688 primary_compute_unit: NPU precision: int8 layer_info: @@ -240,7 +240,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 130 - job_id: jprvo1wkg + job_id: jpy1owe0p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -249,13 +249,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T04:04:01Z' + timestamp: '2024-12-12T01:32:54Z' - torchscript_onnx_tflite: - inference_time: 270259.0 - throughput: 3.700154296434161 + inference_time: 270399.0 + throughput: 3.6982385289886426 estimated_peak_memory_range: - min: 270336 - max: 41413360 + min: 36864 + max: 40310512 primary_compute_unit: NPU precision: int8 layer_info: @@ -263,14 +263,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 90 - job_id: jgz38vn45 + job_id: jg9lz1vmg job_status: Passed torchscript_onnx_qnn: - inference_time: 268117.0 - throughput: 3.729715012475897 + inference_time: 268175.0 + throughput: 3.728908362077002 estimated_peak_memory_range: - min: 1146880 - max: 6977808 + min: 827392 + max: 11508624 primary_compute_unit: NPU precision: int8 layer_info: @@ -278,7 +278,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 130 - job_id: jpy1qvm0p + job_id: jp8qe99qp job_status: Passed reference_device_info: name: SA7255P ADP @@ -287,13 +287,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T04:04:05Z' + timestamp: '2024-12-12T01:32:58Z' - torchscript_onnx_tflite: - inference_time: 15144.0 - throughput: 66.03275224511357 + inference_time: 15185.0 + throughput: 65.85446163977609 estimated_peak_memory_range: - min: 294912 - max: 18725408 + min: 53248 + max: 24967272 primary_compute_unit: NPU precision: int8 layer_info: @@ -301,14 +301,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 90 - job_id: j5we8m445 + job_id: jp14nl0np job_status: Passed torchscript_onnx_qnn: - inference_time: 14237.0 - throughput: 70.23951675212474 + inference_time: 14284.0 + throughput: 70.00840100812097 estimated_peak_memory_range: - min: 880640 - max: 2781480 + min: 913408 + max: 2208824 primary_compute_unit: NPU precision: int8 layer_info: @@ -316,7 +316,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 130 - job_id: jp0zdej05 + job_id: jgke2nnvg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -325,13 +325,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T04:04:08Z' + timestamp: '2024-12-12T01:33:00Z' - torchscript_onnx_tflite: - inference_time: 22346.0 - throughput: 44.75073838718339 + inference_time: 22363.0 + throughput: 44.71671958145151 estimated_peak_memory_range: - min: 307200 - max: 40572656 + min: 311296 + max: 44149408 primary_compute_unit: NPU precision: int8 layer_info: @@ -339,14 +339,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 90 - job_id: jg9lk9dmg + job_id: jgdxd9w6p job_status: Passed torchscript_onnx_qnn: - inference_time: 21046.0 - throughput: 47.51496721467262 + inference_time: 21062.0 + throughput: 47.47887190200361 estimated_peak_memory_range: - min: 856064 - max: 6705104 + min: 909312 + max: 6827696 primary_compute_unit: NPU precision: int8 layer_info: @@ -354,7 +354,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 130 - job_id: jp8q6wxqp + job_id: j5q6lkkep job_status: Passed reference_device_info: name: SA8295P ADP @@ -363,13 +363,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T04:04:10Z' + timestamp: '2024-12-12T01:33:03Z' - torchscript_onnx_tflite: - inference_time: 15187.0 - throughput: 65.8457891617831 + inference_time: 15171.0 + throughput: 65.91523301034869 estimated_peak_memory_range: - min: 81920 - max: 24825128 + min: 61440 + max: 21135000 primary_compute_unit: NPU precision: int8 layer_info: @@ -377,14 +377,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 90 - job_id: jp147q6np + job_id: j57yewzn5 job_status: Passed torchscript_onnx_qnn: - inference_time: 14208.0 - throughput: 70.38288288288288 + inference_time: 14256.0 + throughput: 70.14590347923681 estimated_peak_memory_range: - min: 671744 - max: 2475832 + min: 880640 + max: 2011400 primary_compute_unit: NPU precision: int8 layer_info: @@ -392,7 +392,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 130 - job_id: jgkeor4vg + job_id: jglvyzz25 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -401,13 +401,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T04:04:12Z' + timestamp: '2024-12-12T01:33:05Z' - torchscript_onnx_tflite: - inference_time: 18770.0 - throughput: 53.27650506126798 + inference_time: 18800.0 + throughput: 53.191489361702125 estimated_peak_memory_range: - min: 286720 - max: 41323408 + min: 307200 + max: 42196144 primary_compute_unit: NPU precision: int8 layer_info: @@ -415,14 +415,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 90 - job_id: jgdx8726p + job_id: jp4lyoq25 job_status: Passed torchscript_onnx_qnn: - inference_time: 17772.0 - throughput: 56.268287193337834 + inference_time: 17766.0 + throughput: 56.28729032984352 estimated_peak_memory_range: - min: 860160 - max: 6852864 + min: 0 + max: 6249776 primary_compute_unit: NPU precision: int8 layer_info: @@ -430,7 +430,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 130 - job_id: j5q6z9yep + job_id: j56y8jjnp job_status: Passed reference_device_info: name: SA8775P ADP @@ -439,13 +439,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T04:04:15Z' + timestamp: '2024-12-12T01:33:07Z' - torchscript_onnx_tflite: - inference_time: 17903.0 - throughput: 55.856560353013464 + inference_time: 17846.0 + throughput: 56.03496581867085 estimated_peak_memory_range: - min: 307200 - max: 51643632 + min: 12288 + max: 52590160 primary_compute_unit: NPU precision: int8 layer_info: @@ -453,14 +453,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 90 - job_id: j57ykv9n5 + job_id: jpxkljv85 job_status: Passed torchscript_onnx_qnn: - inference_time: 19761.0 - throughput: 50.60472648145337 + inference_time: 19970.0 + throughput: 50.0751126690035 estimated_peak_memory_range: - min: 1085440 - max: 46433568 + min: 847872 + max: 51413728 primary_compute_unit: NPU precision: int8 layer_info: @@ -468,7 +468,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 130 - job_id: j56yrq7np + job_id: jp3jz33mg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -477,10 +477,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T04:04:17Z' + timestamp: '2024-12-12T01:33:10Z' - torchscript_onnx_qnn: - inference_time: 14577.0 - throughput: 68.60122110173562 + inference_time: 14632.0 + throughput: 68.3433570256971 estimated_peak_memory_range: min: 815104 max: 815104 @@ -491,7 +491,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 130 - job_id: jp2k43e6p + job_id: jp0zmqq05 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -500,4 +500,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T04:04:26Z' + timestamp: '2024-12-12T01:32:56Z' diff --git a/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml b/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml index edfa1c4f..d1cb8c80 100644 --- a/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml +++ b/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: FFNet-122NS-LowRes universal_assets: - torchscript_onnx_tflite: mqy3w38vm - torchscript_onnx: mn7lkl14q + torchscript_onnx_tflite: mq36e5d6q + torchscript_onnx: mnz1vl0dq performance_metrics: - torchscript_onnx_tflite: - inference_time: 7159.0 - throughput: 139.6843134515994 + inference_time: 7207.0 + throughput: 138.75398917718886 estimated_peak_memory_range: - min: 36864 - max: 85514480 + min: 16384 + max: 78559704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: jgdx8o2kp + job_id: j5q6ln1op job_status: Passed torchscript_onnx_qnn: - inference_time: 7251.0 - throughput: 137.91201213625706 + inference_time: 7103.0 + throughput: 140.78558355624384 estimated_peak_memory_range: - min: 6311936 - max: 29832416 + min: 6316032 + max: 37926696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: jgkeo34yg + job_id: jg9lzrq8g job_status: Passed torchscript_onnx: - inference_time: 8241.0 - throughput: 121.34449702705982 + inference_time: 8372.0 + throughput: 119.44577161968466 estimated_peak_memory_range: - min: 20480 - max: 62565168 + min: 16384 + max: 956263952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 350 - job_id: jg9lkwmqg + job_id: jp0zmwxn5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:27:13Z' + timestamp: '2024-12-11T23:54:02Z' - torchscript_onnx_tflite: - inference_time: 5031.0 - throughput: 198.76764062810574 + inference_time: 5015.0 + throughput: 199.40179461615153 estimated_peak_memory_range: - min: 315392 - max: 26544736 + min: 466944 + max: 26889344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: j57ykx9q5 + job_id: jglvydqm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 4944.0 - throughput: 202.26537216828478 + inference_time: 4925.0 + throughput: 203.0456852791878 estimated_peak_memory_range: - min: 6217728 - max: 31348688 + min: 5922816 + max: 32553792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: j5q6z3y7p + job_id: jp14n9m7p job_status: Passed torchscript_onnx: - inference_time: 6045.0 - throughput: 165.42597187758477 + inference_time: 5962.0 + throughput: 167.7289500167729 estimated_peak_memory_range: - min: 7602176 - max: 106790160 + min: 1024000 + max: 93385984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 350 - job_id: jp147ejkp + job_id: jp8qenkop job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:27:14Z' + timestamp: '2024-12-11T23:54:03Z' - torchscript_onnx_tflite: - inference_time: 4876.0 - throughput: 205.08613617719442 + inference_time: 4919.0 + throughput: 203.29335230737954 estimated_peak_memory_range: - min: 651264 - max: 24893280 + min: 634880 + max: 26350400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: jp4lmv3q5 + job_id: j56y8x0yp job_status: Passed torchscript_onnx_qnn: - inference_time: 4074.0 - throughput: 245.4590083456063 + inference_time: 4076.0 + throughput: 245.3385672227674 estimated_peak_memory_range: - min: 5943296 - max: 28208320 + min: 6303744 + max: 30686368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: jglvo3xe5 + job_id: jgdxdkmzp job_status: Passed torchscript_onnx: - inference_time: 5593.0 - throughput: 178.79492222420882 + inference_time: 5747.0 + throughput: 174.00382808421784 estimated_peak_memory_range: - min: 7589888 - max: 57055200 + min: 7585792 + max: 51799104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 350 - job_id: jgdx8o3kp + job_id: jgke21kng job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:27:15Z' + timestamp: '2024-12-11T23:54:04Z' - torchscript_onnx_tflite: - inference_time: 7221.0 - throughput: 138.48497438027974 + inference_time: 7218.0 + throughput: 138.54253255749515 estimated_peak_memory_range: min: 45056 - max: 65043936 + max: 68497848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: jpxk3yxj5 + job_id: jp3jzdrng job_status: Passed torchscript_onnx_qnn: - inference_time: 6681.0 - throughput: 149.678191887442 + inference_time: 6666.0 + throughput: 150.01500150015002 estimated_peak_memory_range: - min: 6356992 - max: 7795208 + min: 6373376 + max: 7595624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: j56yrn7vp + job_id: j57yem895 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:27:03Z' + timestamp: '2024-12-11T23:53:52Z' - torchscript_onnx_tflite: - inference_time: 207136.0 - throughput: 4.827746021937278 + inference_time: 207371.0 + throughput: 4.822275052924469 estimated_peak_memory_range: - min: 630784 - max: 22518448 + min: 704512 + max: 24407184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: j5mno38yp + job_id: jgo2lx9kp job_status: Passed torchscript_onnx_qnn: - inference_time: 206150.0 - throughput: 4.8508367693427115 + inference_time: 206112.0 + throughput: 4.851731097655644 estimated_peak_memory_range: - min: 2248704 - max: 7480304 + min: 155648 + max: 10602384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: jgo2o3r4p + job_id: jpxklqzl5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:27:05Z' + timestamp: '2024-12-11T23:53:55Z' - torchscript_onnx_tflite: - inference_time: 7310.0 - throughput: 136.79890560875512 + inference_time: 7329.0 + throughput: 136.4442625187611 estimated_peak_memory_range: - min: 16384 - max: 77303632 + min: 49152 + max: 76930824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: jgn6o3nv5 + job_id: jpv6l8nr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 6689.0 - throughput: 149.49917775452235 + inference_time: 6692.0 + throughput: 149.43215780035862 estimated_peak_memory_range: - min: 6373376 - max: 7693896 + min: 6377472 + max: 7705952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: jpv6evd75 + job_id: j5mn07l9p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:27:06Z' + timestamp: '2024-12-11T23:53:56Z' - torchscript_onnx_tflite: - inference_time: 12615.0 - throughput: 79.27070947284979 + inference_time: 12578.0 + throughput: 79.50389569088885 estimated_peak_memory_range: min: 655360 - max: 19001568 + max: 21187216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: jprvoe0vg + job_id: jgjvr98eg job_status: Passed torchscript_onnx_qnn: - inference_time: 10983.0 - throughput: 91.04980424292087 + inference_time: 11231.0 + throughput: 89.03926631644555 estimated_peak_memory_range: - min: 61440 - max: 6078176 + min: 6279168 + max: 12346240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: jgjvoe77g + job_id: jgn6z4wq5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:27:08Z' + timestamp: '2024-12-11T23:53:57Z' - torchscript_onnx_tflite: - inference_time: 7291.0 - throughput: 137.1553970648745 + inference_time: 7329.0 + throughput: 136.4442625187611 estimated_peak_memory_range: - min: 16384 - max: 56666592 + min: 32768 + max: 68638808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: jp2k4lwxp + job_id: jped7qnv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 6800.0 - throughput: 147.05882352941177 + inference_time: 6836.0 + throughput: 146.2843768285547 estimated_peak_memory_range: min: 6332416 - max: 7667448 + max: 7549264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: jped8kz75 + job_id: jprvlr77g job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:27:09Z' + timestamp: '2024-12-11T23:53:58Z' - torchscript_onnx_tflite: - inference_time: 12750.0 - throughput: 78.43137254901961 + inference_time: 12800.0 + throughput: 78.125 estimated_peak_memory_range: - min: 0 - max: 21770992 + min: 667648 + max: 24876176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: jpy1q6xrp + job_id: jgz3l60x5 job_status: Passed torchscript_onnx_qnn: - inference_time: 12211.0 - throughput: 81.89337482597658 + inference_time: 12213.0 + throughput: 81.87996397281586 estimated_peak_memory_range: - min: 69632 - max: 5793168 + min: 2232320 + max: 8315024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: jgz38rmz5 + job_id: jp2kr1zqp job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:27:10Z' + timestamp: '2024-12-11T23:54:00Z' - torchscript_onnx_tflite: - inference_time: 10874.0 - throughput: 91.96247930844216 + inference_time: 10813.0 + throughput: 92.48127254231018 estimated_peak_memory_range: - min: 638976 - max: 23716352 + min: 663552 + max: 26437584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: jp0zdlj25 + job_id: j5welkrm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 11156.0 - throughput: 89.63786303334528 + inference_time: 10984.0 + throughput: 91.04151493080845 estimated_peak_memory_range: - min: 0 - max: 23664720 + min: 6311936 + max: 31106464 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: j5we8q7z5 + job_id: jpy1olylp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:27:11Z' + timestamp: '2024-12-11T23:54:01Z' - torchscript_onnx_qnn: - inference_time: 7039.0 - throughput: 142.06563432305725 + inference_time: 7058.0 + throughput: 141.68319637291017 estimated_peak_memory_range: min: 6303744 max: 6303744 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: jp3jxe9xg + job_id: jp4ly7215 job_status: Passed torchscript_onnx: - inference_time: 7290.0 - throughput: 137.17421124828533 + inference_time: 7195.0 + throughput: 138.9854065323141 estimated_peak_memory_range: - min: 59990016 - max: 59990016 + min: 61366272 + max: 61366272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 350 - job_id: j57ykx4q5 + job_id: j5q6lndop job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:27:16Z' + timestamp: '2024-12-11T23:54:05Z' diff --git a/qai_hub_models/models/ffnet_40s/perf.yaml b/qai_hub_models/models/ffnet_40s/perf.yaml index 00cdef7f..194f8c58 100644 --- a/qai_hub_models/models/ffnet_40s/perf.yaml +++ b/qai_hub_models/models/ffnet_40s/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: FFNet-40S universal_assets: - torchscript_onnx_tflite: mmdy8yzvm - torchscript_onnx: mqkkxko7q + torchscript_onnx_tflite: mn4l10zrq + torchscript_onnx: mq9lp960q performance_metrics: - torchscript_onnx_tflite: - inference_time: 17197.0 - throughput: 58.14967726929115 + inference_time: 16962.0 + throughput: 58.95531187359981 estimated_peak_memory_range: - min: 2539520 - max: 26409584 + min: 2527232 + max: 30671112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: jped8kl85 + job_id: jprvlrx7g job_status: Passed torchscript_onnx_qnn: - inference_time: 17457.0 - throughput: 57.28361115884746 + inference_time: 17515.0 + throughput: 57.09391949757351 estimated_peak_memory_range: - min: 25300992 - max: 48523272 + min: 2121728 + max: 28038776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jgn6o37j5 + job_id: jgo2lxwkp job_status: Passed torchscript_onnx: - inference_time: 24917.0 - throughput: 40.13324236465064 + inference_time: 24980.0 + throughput: 40.0320256204964 estimated_peak_memory_range: - min: 25231360 - max: 56434136 + min: 25235456 + max: 27537000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jpv6ev7z5 + job_id: jpxklqrl5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:26:18Z' + timestamp: '2024-12-11T23:53:08Z' - torchscript_onnx_tflite: - inference_time: 12420.0 - throughput: 80.51529790660226 + inference_time: 12386.0 + throughput: 80.73631519457452 estimated_peak_memory_range: - min: 2019328 - max: 38587808 + min: 1388544 + max: 38861488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: jgz38r445 + job_id: jp2kr1oqp job_status: Passed torchscript_onnx_qnn: - inference_time: 12560.0 - throughput: 79.61783439490446 + inference_time: 12578.0 + throughput: 79.50389569088885 estimated_peak_memory_range: - min: 24100864 - max: 62377024 + min: 25092096 + max: 64037392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jprvoenkg + job_id: jpv6l8mr5 job_status: Passed torchscript_onnx: - inference_time: 18387.0 - throughput: 54.38625115570784 + inference_time: 18482.0 + throughput: 54.10669840926307 estimated_peak_memory_range: - min: 32583680 - max: 167129392 + min: 32731136 + max: 165854064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jgjvoeq1g + job_id: j5mn07k9p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:26:19Z' + timestamp: '2024-12-11T23:53:09Z' - torchscript_onnx_tflite: - inference_time: 11867.0 - throughput: 84.26729586247578 + inference_time: 11840.0 + throughput: 84.45945945945945 estimated_peak_memory_range: - min: 1753088 - max: 36857376 + min: 2318336 + max: 39198976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: j5we8q145 + job_id: jpy1ol8lp job_status: Passed torchscript_onnx_qnn: - inference_time: 12103.0 - throughput: 82.62414277451872 + inference_time: 12152.0 + throughput: 82.29098090849243 estimated_peak_memory_range: - min: 25178112 - max: 58797008 + min: 24731648 + max: 58692416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jp2k4lv6p + job_id: jgjvr9yeg job_status: Passed torchscript_onnx: - inference_time: 17580.0 - throughput: 56.882821387940844 + inference_time: 17552.0 + throughput: 56.97356426618049 estimated_peak_memory_range: - min: 17063936 - max: 72793792 + min: 19845120 + max: 77318464 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jped8ky85 + job_id: jgn6z4qq5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:26:20Z' + timestamp: '2024-12-11T23:53:10Z' - torchscript_onnx_tflite: - inference_time: 17226.0 - throughput: 58.05178218971322 + inference_time: 17273.0 + throughput: 57.8938227291148 estimated_peak_memory_range: - min: 2523136 - max: 27781904 + min: 2531328 + max: 27118736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: jg9lkwxmg + job_id: jp0zmwon5 job_status: Passed torchscript_onnx_qnn: - inference_time: 16404.0 - throughput: 60.960741282613995 + inference_time: 16398.0 + throughput: 60.983046713013785 estimated_peak_memory_range: min: 25243648 - max: 26966992 + max: 26873248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jpy1q670p + job_id: jped7qxv5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:26:08Z' + timestamp: '2024-12-11T23:52:57Z' - torchscript_onnx_tflite: - inference_time: 761739.0 - throughput: 1.312785612919911 + inference_time: 761554.0 + throughput: 1.3131045204936223 estimated_peak_memory_range: - min: 16384 - max: 32901024 + min: 32768 + max: 33388432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: jp147evnp + job_id: jp8qenjop job_status: Passed torchscript_onnx_qnn: - inference_time: 760489.0 - throughput: 1.31494341141029 + inference_time: 760494.0 + throughput: 1.3149347660862545 estimated_peak_memory_range: - min: 25231360 - max: 30981216 + min: 25214976 + max: 35589856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jp8q6z1qp + job_id: j5welkzm5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:26:10Z' + timestamp: '2024-12-11T23:53:00Z' - torchscript_onnx_tflite: - inference_time: 17358.0 - throughput: 57.61032377001959 + inference_time: 17592.0 + throughput: 56.84402000909504 estimated_peak_memory_range: - min: 2605056 - max: 28225192 + min: 2199552 + max: 27212744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: jgdx8oz6p + job_id: jgke216ng job_status: Passed torchscript_onnx_qnn: - inference_time: 16483.0 - throughput: 60.668567615118604 + inference_time: 16428.0 + throughput: 60.871682493304114 estimated_peak_memory_range: - min: 25247744 - max: 26663344 + min: 25231360 + max: 26415736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: j5q6z3vep + job_id: jg9lzr28g job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:26:12Z' + timestamp: '2024-12-11T23:53:01Z' - torchscript_onnx_tflite: - inference_time: 31672.0 - throughput: 31.573629704470825 + inference_time: 31658.0 + throughput: 31.58759239370775 estimated_peak_memory_range: - min: 2523136 - max: 32474544 + min: 2510848 + max: 34077584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: j57ykx7n5 + job_id: j5q6ln4op job_status: Passed torchscript_onnx_qnn: - inference_time: 30812.0 - throughput: 32.454887706088535 + inference_time: 30724.0 + throughput: 32.54784533263898 estimated_peak_memory_range: - min: 25182208 - max: 31228096 + min: 25214976 + max: 31105680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jglvo3l25 + job_id: jp14n917p job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:26:13Z' + timestamp: '2024-12-11T23:53:03Z' - torchscript_onnx_tflite: - inference_time: 17000.0 - throughput: 58.8235294117647 + inference_time: 17138.0 + throughput: 58.34986579530867 estimated_peak_memory_range: - min: 2670592 - max: 30361776 + min: 2166784 + max: 29829360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: jp4lmv925 + job_id: jglvyd8m5 job_status: Passed torchscript_onnx_qnn: - inference_time: 16976.0 - throughput: 58.906691800188504 + inference_time: 16609.0 + throughput: 60.20832078993317 estimated_peak_memory_range: - min: 25264128 - max: 26586096 + min: 25268224 + max: 26516496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: j56yrnwnp + job_id: jgdxdk4zp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:26:14Z' + timestamp: '2024-12-11T23:53:04Z' - torchscript_onnx_tflite: - inference_time: 35171.0 - throughput: 28.432515424639618 + inference_time: 35147.0 + throughput: 28.451930463481947 estimated_peak_memory_range: - min: 2527232 - max: 35396912 + min: 2506752 + max: 38048384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: jpxk3yd85 + job_id: j56y8xmyp job_status: Passed torchscript_onnx_qnn: - inference_time: 34558.0 - throughput: 28.936859771977545 + inference_time: 34551.0 + throughput: 28.94272235246447 estimated_peak_memory_range: - min: 25202688 - max: 30957440 + min: 25227264 + max: 31512880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jp3jxe6mg + job_id: j57yemn95 job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:26:15Z' + timestamp: '2024-12-11T23:53:05Z' - torchscript_onnx_tflite: - inference_time: 29619.0 - throughput: 33.76211215773659 + inference_time: 28848.0 + throughput: 34.66444814198558 estimated_peak_memory_range: - min: 2551808 - max: 34921184 + min: 2535424 + max: 38298112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: j5mno3d7p + job_id: jp3jzd7ng job_status: Passed torchscript_onnx_qnn: - inference_time: 28633.0 - throughput: 34.924737191352634 + inference_time: 28856.0 + throughput: 34.65483781535902 estimated_peak_memory_range: - min: 15585280 - max: 50016944 + min: 13074432 + max: 44986688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jgo2o381p + job_id: jp4ly7415 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:26:16Z' + timestamp: '2024-12-11T23:53:07Z' - torchscript_onnx_qnn: - inference_time: 16405.0 - throughput: 60.9570252971655 + inference_time: 16579.0 + throughput: 60.31726883406719 estimated_peak_memory_range: min: 25219072 max: 25219072 @@ -485,11 +485,11 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jp0zdl605 + job_id: jgz3l6yx5 job_status: Passed torchscript_onnx: - inference_time: 30666.0 - throughput: 32.60940455227288 + inference_time: 30680.0 + throughput: 32.59452411994785 estimated_peak_memory_range: min: 25223168 max: 25223168 @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jgz38rn45 + job_id: jprvlrd7g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:26:21Z' + timestamp: '2024-12-11T23:53:11Z' diff --git a/qai_hub_models/models/ffnet_40s_quantized/info.yaml b/qai_hub_models/models/ffnet_40s_quantized/info.yaml index cc8799fe..28b2df33 100644 --- a/qai_hub_models/models/ffnet_40s_quantized/info.yaml +++ b/qai_hub_models/models/ffnet_40s_quantized/info.yaml @@ -11,6 +11,7 @@ use_case: Semantic Segmentation tags: - quantized - real-time +imsdk_supported: true research_paper: https://arxiv.org/abs/2206.08236 research_paper_title: Simple and Efficient Architectures for Semantic Segmentation license: https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE diff --git a/qai_hub_models/models/ffnet_40s_quantized/perf.yaml b/qai_hub_models/models/ffnet_40s_quantized/perf.yaml index c968c895..4d58be5a 100644 --- a/qai_hub_models/models/ffnet_40s_quantized/perf.yaml +++ b/qai_hub_models/models/ffnet_40s_quantized/perf.yaml @@ -50,15 +50,15 @@ aggregated: models: - name: FFNet-40S-Quantized universal_assets: - torchscript_onnx_tflite: mmx7z762q - torchscript_onnx: mqy3w3g5m + torchscript_onnx_tflite: mm6kv796n + torchscript_onnx: mqv64lk0m performance_metrics: - torchscript_onnx_tflite: - inference_time: 4079.0 - throughput: 245.15812699190977 + inference_time: 4157.0 + throughput: 240.55809477988933 estimated_peak_memory_range: - min: 503808 - max: 6581496 + min: 638976 + max: 12180192 primary_compute_unit: NPU precision: int8 layer_info: @@ -66,14 +66,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 99 - job_id: j5q6z3zep + job_id: jp14n9n7p job_status: Passed torchscript_onnx: - inference_time: 8719.0 - throughput: 114.69205184080744 + inference_time: 8709.0 + throughput: 114.82374555057986 estimated_peak_memory_range: - min: 614400 - max: 12321200 + min: 40960 + max: 11631144 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,7 +81,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 168 - job_id: j5q6z3mep + job_id: jp14n9o7p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -90,13 +90,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:25:29Z' + timestamp: '2024-12-11T23:52:19Z' - torchscript_onnx_tflite: - inference_time: 2966.0 - throughput: 337.1544167228591 + inference_time: 2956.0 + throughput: 338.29499323410016 estimated_peak_memory_range: - min: 339968 - max: 27108080 + min: 462848 + max: 30175968 primary_compute_unit: NPU precision: int8 layer_info: @@ -104,14 +104,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 99 - job_id: jglvo3o25 + job_id: jgdxdkdzp job_status: Passed torchscript_onnx: - inference_time: 5911.0 - throughput: 169.1761123329386 + inference_time: 6101.0 + throughput: 163.90755613833798 estimated_peak_memory_range: - min: 7626752 - max: 118168736 + min: 7655424 + max: 120877360 primary_compute_unit: NPU precision: int8 layer_info: @@ -119,7 +119,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 168 - job_id: jglvo3125 + job_id: jgdxdk6zp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -128,13 +128,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:25:30Z' + timestamp: '2024-12-11T23:52:20Z' - torchscript_onnx_tflite: - inference_time: 2943.0 - throughput: 339.7893306150187 + inference_time: 2470.0 + throughput: 404.8582995951417 estimated_peak_memory_range: - min: 319488 - max: 24741792 + min: 634880 + max: 28169472 primary_compute_unit: NPU precision: int8 layer_info: @@ -142,14 +142,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 99 - job_id: j56yrnrnp + job_id: j57yeme95 job_status: Passed torchscript_onnx: - inference_time: 6246.0 - throughput: 160.1024655779699 + inference_time: 5959.0 + throughput: 167.81339150864238 estimated_peak_memory_range: - min: 7622656 - max: 63809728 + min: 5869568 + max: 64542672 primary_compute_unit: NPU precision: int8 layer_info: @@ -157,7 +157,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 168 - job_id: j56yrndnp + job_id: j57yemo95 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -166,13 +166,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:25:31Z' + timestamp: '2024-12-11T23:52:21Z' - torchscript_onnx_tflite: - inference_time: 26695.0 - throughput: 37.46019853905226 + inference_time: 27461.0 + throughput: 36.41527985142566 estimated_peak_memory_range: - min: 679936 - max: 33345168 + min: 675840 + max: 34371568 primary_compute_unit: NPU precision: int8 layer_info: @@ -180,7 +180,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 99 - job_id: jp3jxexmg + job_id: jp4ly7y15 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -189,13 +189,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T02:25:05Z' + timestamp: '2024-12-11T23:51:56Z' - torchscript_onnx_tflite: - inference_time: 189116.0 - throughput: 5.287759893398761 + inference_time: 190546.0 + throughput: 5.248076579933454 estimated_peak_memory_range: - min: 966656 - max: 3164248 + min: 675840 + max: 3216568 primary_compute_unit: NPU precision: int8 layer_info: @@ -203,7 +203,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 99 - job_id: jgo2o3o1p + job_id: jpxklqll5 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -212,13 +212,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T02:25:07Z' + timestamp: '2024-12-11T23:51:57Z' - torchscript_onnx_tflite: - inference_time: 4083.0 - throughput: 244.91795248591723 + inference_time: 4068.0 + throughput: 245.82104228121926 estimated_peak_memory_range: - min: 638976 - max: 13033008 + min: 651264 + max: 10493064 primary_compute_unit: NPU precision: int8 layer_info: @@ -226,7 +226,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 99 - job_id: jpv6evez5 + job_id: j5mn0709p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -235,13 +235,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:25:08Z' + timestamp: '2024-12-11T23:51:58Z' - torchscript_onnx_tflite: - inference_time: 95873.0 - throughput: 10.430465303057169 + inference_time: 96022.0 + throughput: 10.414280060819396 estimated_peak_memory_range: - min: 712704 - max: 23964992 + min: 720896 + max: 24878672 primary_compute_unit: NPU precision: int8 layer_info: @@ -249,7 +249,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 99 - job_id: jgjvoeo1g + job_id: jgn6z4zq5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -258,13 +258,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:25:09Z' + timestamp: '2024-12-11T23:51:59Z' - torchscript_onnx_tflite: - inference_time: 4132.0 - throughput: 242.0135527589545 + inference_time: 4097.0 + throughput: 244.081034903588 estimated_peak_memory_range: - min: 40960 - max: 12956904 + min: 49152 + max: 13287408 primary_compute_unit: NPU precision: int8 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 99 - job_id: jped8k885 + job_id: jprvlrl7g job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:25:10Z' + timestamp: '2024-12-11T23:52:00Z' - torchscript_onnx_tflite: - inference_time: 8069.0 - throughput: 123.93109431156277 + inference_time: 8007.0 + throughput: 124.89072061945798 estimated_peak_memory_range: - min: 655360 - max: 22154416 + min: 659456 + max: 21994240 primary_compute_unit: NPU precision: int8 layer_info: @@ -295,7 +295,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 99 - job_id: jgz38r845 + job_id: jp2kr1rqp job_status: Passed reference_device_info: name: SA8295P ADP @@ -304,13 +304,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:25:11Z' + timestamp: '2024-12-11T23:52:01Z' - torchscript_onnx_tflite: - inference_time: 4173.0 - throughput: 239.63575365444524 + inference_time: 4104.0 + throughput: 243.66471734892787 estimated_peak_memory_range: - min: 647168 - max: 10321464 + min: 663552 + max: 13911264 primary_compute_unit: NPU precision: int8 layer_info: @@ -318,7 +318,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 99 - job_id: j5we8q845 + job_id: jpy1ololp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -327,13 +327,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:25:12Z' + timestamp: '2024-12-11T23:52:02Z' - torchscript_onnx_tflite: - inference_time: 5767.0 - throughput: 173.40038148083926 + inference_time: 5807.0 + throughput: 172.20595832615808 estimated_peak_memory_range: - min: 655360 - max: 24707888 + min: 659456 + max: 27052016 primary_compute_unit: NPU precision: int8 layer_info: @@ -341,7 +341,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 99 - job_id: jg9lkwkmg + job_id: jp0zmwmn5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -350,13 +350,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:25:13Z' + timestamp: '2024-12-11T23:52:03Z' - torchscript_onnx_tflite: - inference_time: 5169.0 - throughput: 193.46101760495262 + inference_time: 5041.0 + throughput: 198.37333862328904 estimated_peak_memory_range: - min: 36864 - max: 30177120 + min: 659456 + max: 35013600 primary_compute_unit: NPU precision: int8 layer_info: @@ -364,7 +364,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 99 - job_id: jp147e7np + job_id: jp8qeneop job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -373,13 +373,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:25:14Z' + timestamp: '2024-12-11T23:52:04Z' - torchscript_onnx: - inference_time: 8722.0 - throughput: 114.65260261407934 + inference_time: 8778.0 + throughput: 113.9211665527455 estimated_peak_memory_range: - min: 9502720 - max: 9502720 + min: 11018240 + max: 11018240 primary_compute_unit: NPU precision: int8 layer_info: @@ -387,7 +387,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 168 - job_id: jp3jxewmg + job_id: jp4ly7e15 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -396,4 +396,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:25:32Z' + timestamp: '2024-12-11T23:52:22Z' diff --git a/qai_hub_models/models/ffnet_54s/perf.yaml b/qai_hub_models/models/ffnet_54s/perf.yaml index 10fa4c7c..0c7df58f 100644 --- a/qai_hub_models/models/ffnet_54s/perf.yaml +++ b/qai_hub_models/models/ffnet_54s/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: FFNet-54S universal_assets: - torchscript_onnx_tflite: mqkkxk11q - torchscript_onnx: mq8kxkevq + torchscript_onnx_tflite: mmxe7lzkn + torchscript_onnx: mmd3ye8kn performance_metrics: - torchscript_onnx_tflite: - inference_time: 19845.0 - throughput: 50.390526581002774 + inference_time: 21129.0 + throughput: 47.32831653178096 estimated_peak_memory_range: - min: 2547712 - max: 29261616 + min: 2543616 + max: 26843248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: j5mno3v7p + job_id: jgjvr97vg job_status: Passed torchscript_onnx_qnn: - inference_time: 20072.0 - throughput: 49.82064567556795 + inference_time: 20452.0 + throughput: 48.89497359671426 estimated_peak_memory_range: - min: 25210880 - max: 48857856 + min: 25227264 + max: 49260648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: j56yrnvnp + job_id: j5mn07mdp job_status: Passed torchscript_onnx: - inference_time: 33996.0 - throughput: 29.415225320625957 + inference_time: 33752.0 + throughput: 29.627873903768666 estimated_peak_memory_range: - min: 25260032 - max: 40153232 + min: 25395200 + max: 27613728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 177 - job_id: jpxk3ym85 + job_id: jp3jzd93g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:24:13Z' + timestamp: '2024-12-11T23:51:08Z' - torchscript_onnx_tflite: - inference_time: 14403.0 - throughput: 69.42997986530584 + inference_time: 14932.0 + throughput: 66.9702652022502 estimated_peak_memory_range: - min: 2338816 - max: 42114832 + min: 2297856 + max: 36608352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: jgn6o3rj5 + job_id: jped7qzo5 job_status: Passed torchscript_onnx_qnn: - inference_time: 14536.0 - throughput: 68.79471656576774 + inference_time: 14626.0 + throughput: 68.37139340899768 estimated_peak_memory_range: - min: 21012480 - max: 57330752 + min: 21004288 + max: 61190672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: jp3jxe8mg + job_id: jgn6z4nk5 job_status: Passed torchscript_onnx: inference_time: 21845.0 throughput: 45.777065690089266 estimated_peak_memory_range: - min: 888832 - max: 136880176 + min: 614400 + max: 138032848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 177 - job_id: j5mno347p + job_id: jgo2lxrqp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:24:14Z' + timestamp: '2024-12-11T23:51:09Z' - torchscript_onnx_tflite: - inference_time: 14040.0 - throughput: 71.22507122507122 + inference_time: 11876.0 + throughput: 84.20343550016841 estimated_peak_memory_range: - min: 724992 - max: 38305600 + min: 2203648 + max: 41334448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: jprvoe1kg + job_id: jgz3l6mo5 job_status: Passed torchscript_onnx_qnn: - inference_time: 14258.0 - throughput: 70.13606396409034 + inference_time: 14245.0 + throughput: 70.2000702000702 estimated_peak_memory_range: min: 25178112 - max: 61592032 + max: 63145200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: jgo2o3m1p + job_id: jprvlr00g job_status: Passed torchscript_onnx: - inference_time: 22078.0 - throughput: 45.29395778603134 + inference_time: 18728.0 + throughput: 53.39598462195643 estimated_peak_memory_range: - min: 30855168 - max: 86106480 + min: 28004352 + max: 83311104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 177 - job_id: jgn6o3xj5 + job_id: jpv6l8lk5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:24:15Z' + timestamp: '2024-12-11T23:51:10Z' - torchscript_onnx_tflite: - inference_time: 20007.0 - throughput: 49.982506122857004 + inference_time: 20015.0 + throughput: 49.96252810392206 estimated_peak_memory_range: - min: 2535424 - max: 28829344 + min: 5414912 + max: 32336176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: jp2k4l36p + job_id: j5welk735 job_status: Passed torchscript_onnx_qnn: - inference_time: 19007.0 - throughput: 52.6121955069185 + inference_time: 19779.0 + throughput: 50.558673340411545 estimated_peak_memory_range: - min: 25272320 - max: 26483040 + min: 25292800 + max: 26572032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: jpv6ev4z5 + job_id: jp2kr1wrp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:24:02Z' + timestamp: '2024-12-11T23:50:59Z' - torchscript_onnx_tflite: - inference_time: 920830.0 - throughput: 1.0859767818164048 + inference_time: 921436.0 + throughput: 1.085262568425805 estimated_peak_memory_range: - min: 86016 - max: 37008768 + min: 172032 + max: 37080800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,7 +257,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: jpy1q6v0p + job_id: jg9lzrmwg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 920022.0 + throughput: 1.0869305299221106 + estimated_peak_memory_range: + min: 25182208 + max: 36551520 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 175 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 175 + job_id: jp0zmwj95 job_status: Passed reference_device_info: name: SA7255P ADP @@ -266,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:24:05Z' + timestamp: '2024-12-11T23:51:01Z' - torchscript_onnx_tflite: - inference_time: 19775.0 - throughput: 50.56890012642225 + inference_time: 20260.0 + throughput: 49.35834155972359 estimated_peak_memory_range: - min: 2510848 - max: 29461488 + min: 2551808 + max: 26197784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: jp0zdlk05 + job_id: jp14n9j8p job_status: Passed torchscript_onnx_qnn: - inference_time: 19115.0 - throughput: 52.3149359142035 + inference_time: 19296.0 + throughput: 51.824212271973465 estimated_peak_memory_range: - min: 26628096 - max: 27894424 + min: 25251840 + max: 26496520 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: j5we8qx45 + job_id: jp8qenxkp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -304,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:24:06Z' + timestamp: '2024-12-11T23:51:03Z' - torchscript_onnx_tflite: - inference_time: 36548.0 - throughput: 27.36127831892306 + inference_time: 36466.0 + throughput: 27.4228048044754 estimated_peak_memory_range: - min: 2531328 - max: 32946864 + min: 2568192 + max: 34343584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: jp8q6z8qp + job_id: jgdxdk3rp job_status: Passed torchscript_onnx_qnn: - inference_time: 35494.0 - throughput: 28.17377584943934 + inference_time: 35389.0 + throughput: 28.25736810873435 estimated_peak_memory_range: - min: 25210880 - max: 31215680 + min: 25231360 + max: 31139456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: jg9lkw8mg + job_id: jgke214wg job_status: Passed reference_device_info: name: SA8295P ADP @@ -342,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:24:08Z' + timestamp: '2024-12-11T23:51:04Z' - torchscript_onnx_tflite: - inference_time: 20094.0 - throughput: 49.76609933313427 + inference_time: 20042.0 + throughput: 49.89522003792037 estimated_peak_memory_range: - min: 2535424 - max: 31351240 + min: 2506752 + max: 25836752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: jgkeo3dvg + job_id: j57yem4v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 19380.0 - throughput: 51.59958720330237 + inference_time: 19225.0 + throughput: 52.01560468140442 estimated_peak_memory_range: - min: 25280512 - max: 26518656 + min: 25251840 + max: 26464880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: jp147e3np + job_id: j5q6lnynp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -380,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:24:09Z' + timestamp: '2024-12-11T23:51:05Z' - torchscript_onnx_tflite: - inference_time: 41163.0 - throughput: 24.293661783640648 + inference_time: 41274.0 + throughput: 24.228327760817947 estimated_peak_memory_range: - min: 933888 - max: 36462784 + min: 1716224 + max: 39761760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: j5q6z3wep + job_id: jp4ly7185 job_status: Passed torchscript_onnx_qnn: - inference_time: 40517.0 - throughput: 24.680998099563148 + inference_time: 40547.0 + throughput: 24.66273707056009 estimated_peak_memory_range: - min: 25231360 - max: 30986416 + min: 25214976 + max: 31224224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: j57ykx6n5 + job_id: jglvydxj5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -418,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:24:10Z' + timestamp: '2024-12-11T23:51:06Z' - torchscript_onnx_tflite: - inference_time: 32828.0 - throughput: 30.461800901669307 + inference_time: 32738.0 + throughput: 30.545543405217177 estimated_peak_memory_range: - min: 2535424 - max: 39344736 + min: 2301952 + max: 37898912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: jglvo3725 + job_id: jpxklq435 job_status: Passed torchscript_onnx_qnn: - inference_time: 32701.0 - throughput: 30.580104583957677 + inference_time: 32612.0 + throughput: 30.663559425978168 estimated_peak_memory_range: - min: 25206784 - max: 58410992 + min: 25255936 + max: 59880640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: jp4lmv825 + job_id: j56y8x76p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -456,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:24:11Z' + timestamp: '2024-12-11T23:51:07Z' - torchscript_onnx_qnn: - inference_time: 19504.0 - throughput: 51.271534044298605 + inference_time: 19265.0 + throughput: 51.90760446405398 estimated_peak_memory_range: min: 25219072 max: 25219072 @@ -470,11 +485,11 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: jgjvoe11g + job_id: jpy1olx8p job_status: Passed torchscript_onnx: - inference_time: 33040.0 - throughput: 30.26634382566586 + inference_time: 33055.0 + throughput: 30.252609287551053 estimated_peak_memory_range: min: 25223168 max: 25223168 @@ -485,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 177 - job_id: jprvoe9kg + job_id: jgjvr9rvg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -494,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:24:16Z' + timestamp: '2024-12-11T23:51:12Z' diff --git a/qai_hub_models/models/ffnet_54s_quantized/info.yaml b/qai_hub_models/models/ffnet_54s_quantized/info.yaml index f68103d3..9180d6c9 100644 --- a/qai_hub_models/models/ffnet_54s_quantized/info.yaml +++ b/qai_hub_models/models/ffnet_54s_quantized/info.yaml @@ -11,6 +11,7 @@ use_case: Semantic Segmentation tags: - quantized - real-time +imsdk_supported: true research_paper: https://arxiv.org/abs/2206.08236 research_paper_title: Simple and Efficient Architectures for Semantic Segmentation license: https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE diff --git a/qai_hub_models/models/ffnet_54s_quantized/perf.yaml b/qai_hub_models/models/ffnet_54s_quantized/perf.yaml index a9e30d34..74026b10 100644 --- a/qai_hub_models/models/ffnet_54s_quantized/perf.yaml +++ b/qai_hub_models/models/ffnet_54s_quantized/perf.yaml @@ -50,15 +50,15 @@ aggregated: models: - name: FFNet-54S-Quantized universal_assets: - torchscript_onnx_tflite: mn41d1vvn - torchscript_onnx: mqpzyzxgn + torchscript_onnx_tflite: mqyv3owvq + torchscript_onnx: mmr36l7wm performance_metrics: - torchscript_onnx_tflite: - inference_time: 4717.0 - throughput: 211.999152003392 + inference_time: 4773.0 + throughput: 209.51183741881417 estimated_peak_memory_range: - min: 942080 - max: 11893112 + min: 675840 + max: 10866272 primary_compute_unit: NPU precision: int8 layer_info: @@ -66,14 +66,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 120 - job_id: jp147ed7p + job_id: jpy1ol78p job_status: Passed torchscript_onnx: - inference_time: 10461.0 - throughput: 95.59315553006405 + inference_time: 10501.0 + throughput: 95.229025807066 estimated_peak_memory_range: - min: 114688 - max: 16234416 + min: 61440 + max: 286700448 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,7 +81,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 217 - job_id: jp147eq7p + job_id: jp8qen1kp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -90,13 +90,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:23:21Z' + timestamp: '2024-12-11T23:50:18Z' - torchscript_onnx_tflite: - inference_time: 3407.0 - throughput: 293.51335485764605 + inference_time: 3399.0 + throughput: 294.20417769932334 estimated_peak_memory_range: - min: 462848 - max: 29932832 + min: 450560 + max: 31875712 primary_compute_unit: NPU precision: int8 layer_info: @@ -104,14 +104,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 120 - job_id: jgdx8orzp + job_id: jp0zmwv95 job_status: Passed torchscript_onnx: - inference_time: 7378.0 - throughput: 135.53808620222281 + inference_time: 7326.0 + throughput: 136.5001365001365 estimated_peak_memory_range: - min: 7626752 - max: 138078784 + min: 7569408 + max: 137463616 primary_compute_unit: NPU precision: int8 layer_info: @@ -119,7 +119,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 217 - job_id: jgdx8o7zp + job_id: jgke218wg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -128,13 +128,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:23:22Z' + timestamp: '2024-12-11T23:50:19Z' - torchscript_onnx_tflite: - inference_time: 3370.0 - throughput: 296.7359050445104 + inference_time: 3372.0 + throughput: 296.55990510083035 estimated_peak_memory_range: - min: 12288 - max: 25798528 + min: 360448 + max: 28037392 primary_compute_unit: NPU precision: int8 layer_info: @@ -142,14 +142,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 120 - job_id: j57ykxj95 + job_id: jp8qen4kp job_status: Passed torchscript_onnx: - inference_time: 6501.0 - throughput: 153.82248884786955 + inference_time: 7202.0 + throughput: 138.8503193557345 estimated_peak_memory_range: - min: 7573504 - max: 70974208 + min: 5931008 + max: 71505968 primary_compute_unit: NPU precision: int8 layer_info: @@ -157,7 +157,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 217 - job_id: j5we8qm45 + job_id: j5q6lnvnp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -166,13 +166,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:23:23Z' + timestamp: '2024-12-11T23:50:21Z' - torchscript_onnx_tflite: - inference_time: 31108.0 - throughput: 32.14607175003215 + inference_time: 31131.0 + throughput: 32.12232180141981 estimated_peak_memory_range: - min: 12288 - max: 33812288 + min: 679936 + max: 39016528 primary_compute_unit: NPU precision: int8 layer_info: @@ -180,7 +180,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 120 - job_id: jp4lmvx15 + job_id: jgke219wg job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -189,13 +189,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T02:22:57Z' + timestamp: '2024-12-11T23:49:54Z' - torchscript_onnx_tflite: - inference_time: 201539.0 - throughput: 4.961818804300904 + inference_time: 202547.0 + throughput: 4.937125704157554 estimated_peak_memory_range: - min: 1368064 - max: 8364728 + min: 516096 + max: 5503120 primary_compute_unit: NPU precision: int8 layer_info: @@ -203,7 +203,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 120 - job_id: jpxk3y7l5 + job_id: j5q6lnmnp job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -212,13 +212,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T02:22:58Z' + timestamp: '2024-12-11T23:49:55Z' - torchscript_onnx_tflite: - inference_time: 4717.0 - throughput: 211.999152003392 + inference_time: 4708.0 + throughput: 212.40441801189465 estimated_peak_memory_range: - min: 659456 - max: 9666168 + min: 643072 + max: 9517952 primary_compute_unit: NPU precision: int8 layer_info: @@ -226,7 +226,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 120 - job_id: j5mno3w9p + job_id: jglvyd1j5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -235,13 +235,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:22:59Z' + timestamp: '2024-12-11T23:49:57Z' - torchscript_onnx_tflite: - inference_time: 115592.0 - throughput: 8.651117724409994 + inference_time: 115554.0 + throughput: 8.653962649497204 estimated_peak_memory_range: - min: 761856 - max: 25984688 + min: 729088 + max: 27152304 primary_compute_unit: NPU precision: int8 layer_info: @@ -249,7 +249,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 120 - job_id: jgn6o39q5 + job_id: j56y8xd6p job_status: Passed reference_device_info: name: SA7255P ADP @@ -258,13 +258,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:23:00Z' + timestamp: '2024-12-11T23:49:58Z' - torchscript_onnx_tflite: - inference_time: 4777.0 - throughput: 209.33640360058615 + inference_time: 4784.0 + throughput: 209.03010033444815 estimated_peak_memory_range: - min: 761856 - max: 9796616 + min: 643072 + max: 10323672 primary_compute_unit: NPU precision: int8 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 120 - job_id: jprvoe47g + job_id: jp3jzdw3g job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:23:01Z' + timestamp: '2024-12-11T23:49:59Z' - torchscript_onnx_tflite: - inference_time: 9270.0 - throughput: 107.87486515641855 + inference_time: 9293.0 + throughput: 107.60787689658883 estimated_peak_memory_range: - min: 638976 - max: 24474752 + min: 667648 + max: 29846672 primary_compute_unit: NPU precision: int8 layer_info: @@ -295,7 +295,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 120 - job_id: jp2k4l7qp + job_id: jgo2lx4qp job_status: Passed reference_device_info: name: SA8295P ADP @@ -304,13 +304,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:23:02Z' + timestamp: '2024-12-11T23:50:00Z' - torchscript_onnx_tflite: - inference_time: 4694.0 - throughput: 213.0379207498935 + inference_time: 4830.0 + throughput: 207.0393374741201 estimated_peak_memory_range: - min: 643072 - max: 11575312 + min: 749568 + max: 16441744 primary_compute_unit: NPU precision: int8 layer_info: @@ -318,7 +318,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 120 - job_id: jpy1q64lp + job_id: jgjvr9qvg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -327,13 +327,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:23:03Z' + timestamp: '2024-12-11T23:50:01Z' - torchscript_onnx_tflite: - inference_time: 6495.0 - throughput: 153.96458814472672 + inference_time: 6535.0 + throughput: 153.0221882172915 estimated_peak_memory_range: - min: 655360 - max: 26177424 + min: 659456 + max: 29763120 primary_compute_unit: NPU precision: int8 layer_info: @@ -341,7 +341,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 120 - job_id: jp0zdl1n5 + job_id: jped7qyo5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -350,13 +350,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:23:04Z' + timestamp: '2024-12-11T23:50:02Z' - torchscript_onnx_tflite: - inference_time: 5904.0 - throughput: 169.37669376693768 + inference_time: 5912.0 + throughput: 169.14749661705008 estimated_peak_memory_range: - min: 655360 - max: 33237360 + min: 32768 + max: 35565952 primary_compute_unit: NPU precision: int8 layer_info: @@ -364,7 +364,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 120 - job_id: jp8q6z3op + job_id: jgz3l6no5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -373,13 +373,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:23:05Z' + timestamp: '2024-12-11T23:50:04Z' - torchscript_onnx: - inference_time: 10684.0 - throughput: 93.59790340696368 + inference_time: 10628.0 + throughput: 94.0910801656003 estimated_peak_memory_range: - min: 13836288 - max: 13836288 + min: 14770176 + max: 14770176 primary_compute_unit: NPU precision: int8 layer_info: @@ -387,7 +387,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 217 - job_id: jp147eqnp + job_id: jglvydlj5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -396,4 +396,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:23:24Z' + timestamp: '2024-12-11T23:50:22Z' diff --git a/qai_hub_models/models/ffnet_78s/perf.yaml b/qai_hub_models/models/ffnet_78s/perf.yaml index 8ead1045..a09a68bd 100644 --- a/qai_hub_models/models/ffnet_78s/perf.yaml +++ b/qai_hub_models/models/ffnet_78s/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: FFNet-78S universal_assets: - torchscript_onnx_tflite: mng1d1r0n - torchscript_onnx: mqy3w3vxm + torchscript_onnx_tflite: mq8dkrj3m + torchscript_onnx: mq36e59lq performance_metrics: - torchscript_onnx_tflite: - inference_time: 23720.0 - throughput: 42.158516020236085 + inference_time: 23401.0 + throughput: 42.73321652920816 estimated_peak_memory_range: - min: 2580480 - max: 31120064 + min: 2527232 + max: 31573896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jpv6evwr5 + job_id: jgdxdk8rp job_status: Passed torchscript_onnx_qnn: - inference_time: 24001.0 - throughput: 41.664930627890506 + inference_time: 23624.0 + throughput: 42.329834067050456 estimated_peak_memory_range: - min: 25235456 - max: 54542064 + min: 25227264 + max: 49894160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jpxk3y1l5 + job_id: jp8qen6kp job_status: Passed torchscript_onnx: - inference_time: 37609.0 - throughput: 26.589380201547502 + inference_time: 37774.0 + throughput: 26.473235558850003 estimated_peak_memory_range: - min: 25350144 - max: 56417120 + min: 25280512 + max: 27841024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 237 - job_id: j56yrn3yp + job_id: j5welk135 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:22:05Z' + timestamp: '2024-12-11T23:49:02Z' - torchscript_onnx_tflite: - inference_time: 17320.0 - throughput: 57.736720554272516 + inference_time: 17568.0 + throughput: 56.92167577413479 estimated_peak_memory_range: - min: 1454080 - max: 46551216 + min: 475136 + max: 46299040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jgjvoeleg + job_id: j57yemkv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 17928.0 - throughput: 55.77867023650156 + inference_time: 17359.0 + throughput: 57.607005011809434 estimated_peak_memory_range: - min: 20922368 - max: 61548944 + min: 21004288 + max: 60660768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: j5mno3z9p + job_id: jgke21owg job_status: Passed torchscript_onnx: - inference_time: 26395.0 - throughput: 37.885963250615646 + inference_time: 26397.0 + throughput: 37.88309277569421 estimated_peak_memory_range: - min: 33619968 - max: 186198608 + min: 31318016 + max: 184200752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 237 - job_id: jp3jxe4ng + job_id: jg9lzrxwg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:22:06Z' + timestamp: '2024-12-11T23:49:03Z' - torchscript_onnx_tflite: - inference_time: 14248.0 - throughput: 70.18528916339136 + inference_time: 16936.0 + throughput: 59.045819555975434 estimated_peak_memory_range: - min: 1097728 - max: 44656336 + min: 479232 + max: 44768784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jped8kvv5 + job_id: jp4ly7m85 job_status: Passed torchscript_onnx_qnn: - inference_time: 16848.0 - throughput: 59.35422602089269 + inference_time: 17026.0 + throughput: 58.73370139786209 estimated_peak_memory_range: - min: 20983808 - max: 62962144 + min: 25182208 + max: 66754368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jgn6o3eq5 + job_id: j5q6lnznp job_status: Passed torchscript_onnx: - inference_time: 25427.0 - throughput: 39.328273095528374 + inference_time: 25401.0 + throughput: 39.36852879807881 estimated_peak_memory_range: - min: 29069312 - max: 90316672 + min: 32686080 + max: 94464576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 237 - job_id: jgo2o31kp + job_id: jp14n9v8p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:22:07Z' + timestamp: '2024-12-11T23:49:04Z' - torchscript_onnx_tflite: - inference_time: 23647.0 - throughput: 42.28866240960799 + inference_time: 24470.0 + throughput: 40.86636697997548 estimated_peak_memory_range: - min: 2555904 - max: 31297024 + min: 2572288 + max: 31024856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jgz38r7x5 + job_id: jpxklq335 job_status: Passed torchscript_onnx_qnn: - inference_time: 23086.0 - throughput: 43.31629559040111 + inference_time: 23056.0 + throughput: 43.37265787647467 estimated_peak_memory_range: - min: 25260032 - max: 26374528 + min: 25227264 + max: 26894568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jprvoey7g + job_id: jglvydoj5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:21:56Z' + timestamp: '2024-12-11T23:48:52Z' - torchscript_onnx_tflite: - inference_time: 1164068.0 - throughput: 0.8590563437874763 + inference_time: 1163924.0 + throughput: 0.8591626257384503 estimated_peak_memory_range: - min: 114688 - max: 42261232 + min: 12288 + max: 40656656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: j5we8q9m5 + job_id: j5mn07odp job_status: Passed torchscript_onnx_qnn: - inference_time: 1162892.0 - throughput: 0.8599250833267406 + inference_time: 1162845.0 + throughput: 0.8599598398754779 estimated_peak_memory_range: - min: 25223168 - max: 31063632 + min: 25198592 + max: 35492176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jpy1q6dlp + job_id: jp3jzdx3g job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:21:58Z' + timestamp: '2024-12-11T23:48:54Z' - torchscript_onnx_tflite: - inference_time: 23214.0 - throughput: 43.07745326096321 + inference_time: 23756.0 + throughput: 42.09462872537464 estimated_peak_memory_range: - min: 2510848 - max: 33245712 + min: 2547712 + max: 30991864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jg9lkw48g + job_id: jgn6z4ok5 job_status: Passed torchscript_onnx_qnn: - inference_time: 24106.0 - throughput: 41.483448104206424 + inference_time: 23634.0 + throughput: 42.31192350004231 estimated_peak_memory_range: - min: 25292800 - max: 26606184 + min: 25268224 + max: 26520024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jp0zdlrn5 + job_id: jgo2lxoqp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:21:59Z' + timestamp: '2024-12-11T23:48:56Z' - torchscript_onnx_tflite: - inference_time: 44303.0 - throughput: 22.571834864456132 + inference_time: 44105.0 + throughput: 22.673166307674865 estimated_peak_memory_range: - min: 2527232 - max: 35482960 + min: 2572288 + max: 37130496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jp147e87p + job_id: jprvlro0g job_status: Passed torchscript_onnx_qnn: - inference_time: 43366.0 - throughput: 23.05953973158696 + inference_time: 43449.0 + throughput: 23.015489424382608 estimated_peak_memory_range: - min: 25182208 - max: 30934000 + min: 27987968 + max: 34093984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jp8q6z7op + job_id: jpv6l89k5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:22:01Z' + timestamp: '2024-12-11T23:48:57Z' - torchscript_onnx_tflite: - inference_time: 23216.0 - throughput: 43.0737422467264 + inference_time: 23466.0 + throughput: 42.61484701269922 estimated_peak_memory_range: - min: 2322432 - max: 34077928 + min: 2543616 + max: 29814496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jgdx8ovzp + job_id: jp2kr14rp job_status: Passed torchscript_onnx_qnn: - inference_time: 23393.0 - throughput: 42.74783054759971 + inference_time: 23294.0 + throughput: 42.92950974499871 estimated_peak_memory_range: - min: 25284608 - max: 26719168 + min: 25239552 + max: 26824128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jgkeo3yng + job_id: jgjvr9wvg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:22:02Z' + timestamp: '2024-12-11T23:48:58Z' - torchscript_onnx_tflite: - inference_time: 49885.0 - throughput: 20.046106043900974 + inference_time: 49839.0 + throughput: 20.064608037881978 estimated_peak_memory_range: - min: 2560000 - max: 44369504 + min: 0 + max: 41821952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: j57ykxd95 + job_id: jpy1olq8p job_status: Passed torchscript_onnx_qnn: - inference_time: 49835.0 - throughput: 20.066218521119694 + inference_time: 49910.0 + throughput: 20.036064916850332 estimated_peak_memory_range: - min: 25223168 - max: 30775040 + min: 25235456 + max: 31331104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: j5q6z32op + job_id: jped7qlo5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:22:03Z' + timestamp: '2024-12-11T23:48:59Z' - torchscript_onnx_tflite: - inference_time: 39548.0 - throughput: 25.285728734702133 + inference_time: 41391.0 + throughput: 24.159841511439684 estimated_peak_memory_range: - min: 2531328 - max: 42756624 + min: 2568192 + max: 39364960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jp4lmvw15 + job_id: jp0zmwd95 job_status: Passed torchscript_onnx_qnn: - inference_time: 39617.0 - throughput: 25.241689173839514 + inference_time: 39764.0 + throughput: 25.148375414948195 estimated_peak_memory_range: - min: 25223168 - max: 60769024 + min: 14618624 + max: 54878976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jglvo30m5 + job_id: jgz3l64o5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:22:04Z' + timestamp: '2024-12-11T23:49:00Z' - torchscript_onnx_qnn: - inference_time: 23164.0 - throughput: 43.17043688482127 + inference_time: 23125.0 + throughput: 43.24324324324324 estimated_peak_memory_range: min: 25219072 max: 25219072 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jp2k4lmqp + job_id: j56y8xr6p job_status: Passed torchscript_onnx: - inference_time: 36863.0 - throughput: 27.12747199088517 + inference_time: 36969.0 + throughput: 27.04969028104628 estimated_peak_memory_range: - min: 32497664 - max: 32497664 + min: 33570816 + max: 33570816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 237 - job_id: jpv6ev1r5 + job_id: jgdxdkzrp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:22:08Z' + timestamp: '2024-12-11T23:49:05Z' diff --git a/qai_hub_models/models/ffnet_78s_lowres/perf.yaml b/qai_hub_models/models/ffnet_78s_lowres/perf.yaml index 0b4b56fb..9635c52d 100644 --- a/qai_hub_models/models/ffnet_78s_lowres/perf.yaml +++ b/qai_hub_models/models/ffnet_78s_lowres/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: FFNet-78S-LowRes universal_assets: - torchscript_onnx_tflite: mqv4p44eq - torchscript_onnx: mqv4p40eq + torchscript_onnx_tflite: mqe7xerym + torchscript_onnx: mmd3ye2on performance_metrics: - torchscript_onnx_tflite: - inference_time: 8256.0 - throughput: 121.12403100775194 + inference_time: 8147.0 + throughput: 122.74456855284154 estimated_peak_memory_range: min: 32768 - max: 53059088 + max: 52294464 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jp8q6zmop + job_id: jg9lzr8lg job_status: Passed torchscript_onnx_qnn: - inference_time: 8341.0 - throughput: 119.88970147464333 + inference_time: 8369.0 + throughput: 119.4885888397658 estimated_peak_memory_range: - min: 6316032 - max: 31075536 + min: 6340608 + max: 31825672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: jgz38rrx5 + job_id: jpy1oln7p job_status: Passed torchscript_onnx: - inference_time: 7912.0 - throughput: 126.39029322548028 + inference_time: 8265.0 + throughput: 120.99213551119178 estimated_peak_memory_range: - min: 0 - max: 53717200 + min: 6307840 + max: 735374688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 238 - job_id: jp2k4llqp + job_id: jped7q215 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:21:12Z' + timestamp: '2024-12-11T23:48:10Z' - torchscript_onnx_tflite: - inference_time: 5748.0 - throughput: 173.97355601948504 + inference_time: 5750.0 + throughput: 173.91304347826087 estimated_peak_memory_range: - min: 659456 - max: 27780448 + min: 503808 + max: 28954256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jgkeo3qng + job_id: jp14n932p job_status: Passed torchscript_onnx_qnn: - inference_time: 5793.0 - throughput: 172.62213015708613 + inference_time: 5775.0 + throughput: 173.16017316017317 estimated_peak_memory_range: - min: 6258688 - max: 33102160 + min: 6307840 + max: 35276880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: j5we8qqm5 + job_id: jp0zmwk65 job_status: Passed torchscript_onnx: - inference_time: 5817.0 - throughput: 171.909919202338 + inference_time: 5834.0 + throughput: 171.40898183064792 estimated_peak_memory_range: - min: 7598080 - max: 94955312 + min: 0 + max: 85286720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 238 - job_id: jpy1q66lp + job_id: jgz3l6wk5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:21:13Z' + timestamp: '2024-12-11T23:48:11Z' - torchscript_onnx_tflite: - inference_time: 5640.0 - throughput: 177.3049645390071 + inference_time: 5633.0 + throughput: 177.52529735487306 estimated_peak_memory_range: - min: 151552 - max: 23507136 + min: 102400 + max: 25071824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: j5q6z3rop + job_id: jgdxdk0ep job_status: Passed torchscript_onnx_qnn: - inference_time: 4777.0 - throughput: 209.33640360058615 + inference_time: 5670.0 + throughput: 176.3668430335097 estimated_peak_memory_range: min: 6303744 - max: 27048944 + max: 29314688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: jg9lkww8g + job_id: jp8qen8xp job_status: Passed torchscript_onnx: - inference_time: 5824.0 - throughput: 171.7032967032967 + inference_time: 5771.0 + throughput: 173.28019407381737 estimated_peak_memory_range: - min: 1593344 - max: 48337232 + min: 7593984 + max: 55294928 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 238 - job_id: jp0zdlln5 + job_id: j5welk865 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:21:15Z' + timestamp: '2024-12-11T23:48:12Z' - torchscript_onnx_tflite: - inference_time: 8172.0 - throughput: 122.36906510034264 + inference_time: 8156.0 + throughput: 122.60912211868563 estimated_peak_memory_range: - min: 16384 - max: 43896672 + min: 28672 + max: 42290568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jglvo33m5 + job_id: j57yem6l5 job_status: Passed torchscript_onnx_qnn: - inference_time: 7604.0 - throughput: 131.5097317201473 + inference_time: 7570.0 + throughput: 132.1003963011889 estimated_peak_memory_range: - min: 6365184 - max: 7608600 + min: 6340608 + max: 7554464 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: jp147ee7p + job_id: jgke21d2g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:21:03Z' + timestamp: '2024-12-11T23:48:01Z' - torchscript_onnx_tflite: - inference_time: 289313.0 - throughput: 3.4564641063484878 + inference_time: 289319.0 + throughput: 3.4563924249703613 estimated_peak_memory_range: - min: 778240 - max: 22912368 + min: 430080 + max: 23404320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: j56yrnnyp + job_id: jp4ly78v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 288598.0 - throughput: 3.4650274776678978 + inference_time: 288641.0 + throughput: 3.464511278716468 estimated_peak_memory_range: - min: 4018176 - max: 9650704 + min: 4083712 + max: 14371680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: j57ykxx95 + job_id: jglvyd785 job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:21:05Z' + timestamp: '2024-12-11T23:48:03Z' - torchscript_onnx_tflite: - inference_time: 8357.0 - throughput: 119.66016513102788 + inference_time: 8260.0 + throughput: 121.06537530266344 estimated_peak_memory_range: - min: 643072 - max: 25126456 + min: 638976 + max: 24940840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jp3jxeeng + job_id: jpxklqm15 job_status: Passed torchscript_onnx_qnn: - inference_time: 7706.0 - throughput: 129.76901116013497 + inference_time: 7698.0 + throughput: 129.90387113535982 estimated_peak_memory_range: - min: 6340608 - max: 7605064 + min: 8339456 + max: 9503808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: jp4lmvv15 + job_id: j56y8xv0p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:21:07Z' + timestamp: '2024-12-11T23:48:04Z' - torchscript_onnx_tflite: - inference_time: 13989.0 - throughput: 71.4847380084352 + inference_time: 14002.0 + throughput: 71.4183688044565 estimated_peak_memory_range: - min: 638976 - max: 18311072 + min: 667648 + max: 20746848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jgo2o33kp + job_id: j5mn074wp job_status: Passed torchscript_onnx_qnn: - inference_time: 13538.0 - throughput: 73.86615452799528 + inference_time: 13677.0 + throughput: 73.11544929443592 estimated_peak_memory_range: - min: 53248 - max: 6093296 + min: 45056 + max: 6209248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: jpxk3yyl5 + job_id: jp3jzd8lg job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:21:08Z' + timestamp: '2024-12-11T23:48:05Z' - torchscript_onnx_tflite: - inference_time: 8152.0 - throughput: 122.6692836113837 + inference_time: 8262.0 + throughput: 121.03606874848705 estimated_peak_memory_range: - min: 24576 - max: 64662336 + min: 643072 + max: 29302728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jpv6evvr5 + job_id: jgn6z4xr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 7592.0 - throughput: 131.71759747102212 + inference_time: 7699.0 + throughput: 129.88699831146903 estimated_peak_memory_range: - min: 6397952 - max: 7775816 + min: 7843840 + max: 9511512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: j5mno339p + job_id: jgo2lxmxp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:21:09Z' + timestamp: '2024-12-11T23:48:06Z' - torchscript_onnx_tflite: - inference_time: 15754.0 - throughput: 63.475942617747876 + inference_time: 15778.0 + throughput: 63.37938902268982 estimated_peak_memory_range: - min: 659456 - max: 22213952 + min: 724992 + max: 23953424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jgjvoeeeg + job_id: jprvlr99g job_status: Passed torchscript_onnx_qnn: - inference_time: 15156.0 - throughput: 65.98046978094484 + inference_time: 15239.0 + throughput: 65.62110374696502 estimated_peak_memory_range: - min: 1454080 - max: 7217824 + min: 1642496 + max: 12689632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: jgn6o33q5 + job_id: jpv6l84j5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:21:10Z' + timestamp: '2024-12-11T23:48:08Z' - torchscript_onnx_tflite: - inference_time: 12133.0 - throughput: 82.41984669908514 + inference_time: 12117.0 + throughput: 82.52867871585376 estimated_peak_memory_range: - min: 585728 - max: 26486288 + min: 122880 + max: 28060272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jped8kkv5 + job_id: jp2kr1j4p job_status: Passed torchscript_onnx_qnn: - inference_time: 12541.0 - throughput: 79.73845785822502 + inference_time: 12312.0 + throughput: 81.22157244964262 estimated_peak_memory_range: - min: 6258688 - max: 27023776 + min: 6311936 + max: 31199872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: jprvoee7g + job_id: jgjvr91xg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:21:11Z' + timestamp: '2024-12-11T23:48:09Z' - torchscript_onnx_qnn: - inference_time: 8199.0 - throughput: 121.96609342602757 + inference_time: 8071.0 + throughput: 123.90038409119069 estimated_peak_memory_range: min: 6303744 max: 6303744 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: jgdx8oozp + job_id: j5q6lnw4p job_status: Passed torchscript_onnx: - inference_time: 8874.0 - throughput: 112.6887536623845 + inference_time: 8814.0 + throughput: 113.45586566825504 estimated_peak_memory_range: - min: 49336320 - max: 49336320 + min: 49364992 + max: 49364992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 238 - job_id: jp8q6zzop + job_id: jg9lzrklg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:21:16Z' + timestamp: '2024-12-11T23:48:13Z' diff --git a/qai_hub_models/models/ffnet_78s_quantized/info.yaml b/qai_hub_models/models/ffnet_78s_quantized/info.yaml index 83519214..55ac5b86 100644 --- a/qai_hub_models/models/ffnet_78s_quantized/info.yaml +++ b/qai_hub_models/models/ffnet_78s_quantized/info.yaml @@ -11,6 +11,7 @@ use_case: Semantic Segmentation tags: - quantized - real-time +imsdk_supported: true research_paper: https://arxiv.org/abs/2206.08236 research_paper_title: Simple and Efficient Architectures for Semantic Segmentation license: https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE diff --git a/qai_hub_models/models/ffnet_78s_quantized/perf.yaml b/qai_hub_models/models/ffnet_78s_quantized/perf.yaml index 5ead4d5a..168df739 100644 --- a/qai_hub_models/models/ffnet_78s_quantized/perf.yaml +++ b/qai_hub_models/models/ffnet_78s_quantized/perf.yaml @@ -50,15 +50,15 @@ aggregated: models: - name: FFNet-78S-Quantized universal_assets: - torchscript_onnx_tflite: mnwe1e33n - torchscript_onnx: mq3e1evkm + torchscript_onnx_tflite: mn7xl223q + torchscript_onnx: mq9lp99lq performance_metrics: - torchscript_onnx_tflite: - inference_time: 5703.0 - throughput: 175.34630896019638 + inference_time: 5704.0 + throughput: 175.3155680224404 estimated_peak_memory_range: - min: 638976 - max: 15011568 + min: 659456 + max: 15482696 primary_compute_unit: NPU precision: int8 layer_info: @@ -66,14 +66,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 156 - job_id: jp2k4l0rp + job_id: j56y8x30p job_status: Passed torchscript_onnx: - inference_time: 11485.0 - throughput: 87.070091423596 + inference_time: 11437.0 + throughput: 87.4355163067238 estimated_peak_memory_range: - min: 90112 - max: 24380520 + min: 45056 + max: 24613376 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,7 +81,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 301 - job_id: jpxk3y6l5 + job_id: j56y8xq0p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -90,13 +90,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:20:23Z' + timestamp: '2024-12-11T23:47:22Z' - torchscript_onnx_tflite: - inference_time: 4078.0 - throughput: 245.21824423737127 + inference_time: 4092.0 + throughput: 244.37927663734115 estimated_peak_memory_range: min: 638976 - max: 33541392 + max: 34856896 primary_compute_unit: NPU precision: int8 layer_info: @@ -104,14 +104,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 156 - job_id: jpy1q6r8p + job_id: jp3jzd4lg job_status: Passed torchscript_onnx: - inference_time: 7814.0 - throughput: 127.9754287176862 + inference_time: 7789.0 + throughput: 128.38618564642445 estimated_peak_memory_range: - min: 7626752 - max: 163944784 + min: 7589888 + max: 166955328 primary_compute_unit: NPU precision: int8 layer_info: @@ -119,7 +119,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 301 - job_id: j5mno369p + job_id: jp3jzdqlg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -128,13 +128,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:20:24Z' + timestamp: '2024-12-11T23:47:23Z' - torchscript_onnx_tflite: inference_time: 4008.0 throughput: 249.500998003992 estimated_peak_memory_range: - min: 28672 - max: 29486304 + min: 634880 + max: 31325264 primary_compute_unit: NPU precision: int8 layer_info: @@ -142,14 +142,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 156 - job_id: jp0zdl395 + job_id: jgo2lx1xp job_status: Passed torchscript_onnx: - inference_time: 6816.0 - throughput: 146.71361502347418 + inference_time: 7802.0 + throughput: 128.1722635221738 estimated_peak_memory_range: - min: 13561856 - max: 87903664 + min: 2084864 + max: 79411136 primary_compute_unit: NPU precision: int8 layer_info: @@ -157,7 +157,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 301 - job_id: jgn6o3mq5 + job_id: jgo2lxexp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -166,13 +166,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:20:25Z' + timestamp: '2024-12-11T23:47:24Z' - torchscript_onnx_tflite: - inference_time: 35719.0 - throughput: 27.99630448780761 + inference_time: 36948.0 + throughput: 27.065064414853307 estimated_peak_memory_range: min: 684032 - max: 38224720 + max: 40262288 primary_compute_unit: NPU precision: int8 layer_info: @@ -180,7 +180,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 156 - job_id: jp8q6z0kp + job_id: jpv6l81j5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -189,13 +189,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T02:20:01Z' + timestamp: '2024-12-11T23:46:58Z' - torchscript_onnx_tflite: - inference_time: 215731.0 - throughput: 4.635402422461306 + inference_time: 218142.0 + throughput: 4.584169944348177 estimated_peak_memory_range: - min: 675840 - max: 2679784 + min: 692224 + max: 3147816 primary_compute_unit: NPU precision: int8 layer_info: @@ -203,7 +203,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 156 - job_id: jgkeo37wg + job_id: jgjvr90xg job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -212,13 +212,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T02:20:02Z' + timestamp: '2024-12-11T23:46:59Z' - torchscript_onnx_tflite: - inference_time: 5672.0 - throughput: 176.30465444287728 + inference_time: 5678.0 + throughput: 176.11835153222967 estimated_peak_memory_range: - min: 655360 - max: 13641344 + min: 638976 + max: 14534400 primary_compute_unit: NPU precision: int8 layer_info: @@ -226,7 +226,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 156 - job_id: j5q6z3enp + job_id: jped7qr15 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -235,13 +235,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:20:03Z' + timestamp: '2024-12-11T23:47:00Z' - torchscript_onnx_tflite: - inference_time: 145368.0 - throughput: 6.879093060370921 + inference_time: 145456.0 + throughput: 6.874931250687493 estimated_peak_memory_range: - min: 733184 - max: 29772912 + min: 311296 + max: 29886944 primary_compute_unit: NPU precision: int8 layer_info: @@ -249,7 +249,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 156 - job_id: jglvo36j5 + job_id: jgz3l6xk5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -258,13 +258,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:20:04Z' + timestamp: '2024-12-11T23:47:02Z' - torchscript_onnx_tflite: - inference_time: 5731.0 - throughput: 174.48961786773688 + inference_time: 5824.0 + throughput: 171.7032967032967 estimated_peak_memory_range: - min: 638976 - max: 14776536 + min: 765952 + max: 13846936 primary_compute_unit: NPU precision: int8 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 156 - job_id: j56yrne6p + job_id: j5welkm65 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:20:05Z' + timestamp: '2024-12-11T23:47:03Z' - torchscript_onnx_tflite: - inference_time: 11233.0 - throughput: 89.02341315766047 + inference_time: 11234.0 + throughput: 89.01548869503294 estimated_peak_memory_range: - min: 667648 - max: 28473040 + min: 675840 + max: 32868192 primary_compute_unit: NPU precision: int8 layer_info: @@ -295,7 +295,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 156 - job_id: jp3jxev3g + job_id: jg9lzr9lg job_status: Passed reference_device_info: name: SA8295P ADP @@ -304,13 +304,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:20:06Z' + timestamp: '2024-12-11T23:47:04Z' - torchscript_onnx_tflite: - inference_time: 5854.0 - throughput: 170.8233686368295 + inference_time: 5696.0 + throughput: 175.56179775280899 estimated_peak_memory_range: - min: 667648 - max: 14694296 + min: 638976 + max: 13544088 primary_compute_unit: NPU precision: int8 layer_info: @@ -318,7 +318,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 156 - job_id: jgo2o3kqp + job_id: jp14n9q2p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -327,13 +327,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:20:07Z' + timestamp: '2024-12-11T23:47:05Z' - torchscript_onnx_tflite: - inference_time: 7642.0 - throughput: 130.8557969118032 + inference_time: 7605.0 + throughput: 131.49243918474687 estimated_peak_memory_range: - min: 421888 - max: 29138832 + min: 655360 + max: 33208992 primary_compute_unit: NPU precision: int8 layer_info: @@ -341,7 +341,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 156 - job_id: jpv6evrk5 + job_id: jgdxdk7ep job_status: Passed reference_device_info: name: SA8775P ADP @@ -350,13 +350,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:20:08Z' + timestamp: '2024-12-11T23:47:06Z' - torchscript_onnx_tflite: - inference_time: 7139.0 - throughput: 140.07564084605687 + inference_time: 7129.0 + throughput: 140.27212792818068 estimated_peak_memory_range: - min: 667648 - max: 38806384 + min: 733184 + max: 36569424 primary_compute_unit: NPU precision: int8 layer_info: @@ -364,7 +364,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 156 - job_id: jgjvoe2vg + job_id: j57yemvl5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -373,13 +373,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:20:09Z' + timestamp: '2024-12-11T23:47:07Z' - torchscript_onnx: - inference_time: 11704.0 - throughput: 85.44087491455913 + inference_time: 11780.0 + throughput: 84.88964346349745 estimated_peak_memory_range: - min: 23359488 - max: 23359488 + min: 24608768 + max: 24608768 primary_compute_unit: NPU precision: int8 layer_info: @@ -387,7 +387,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 301 - job_id: jprvoe27g + job_id: jpv6l8zj5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -396,4 +396,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:20:27Z' + timestamp: '2024-12-11T23:47:25Z' diff --git a/qai_hub_models/models/foot_track_net/perf.yaml b/qai_hub_models/models/foot_track_net/perf.yaml index ed5c0d6a..30fa04fd 100644 --- a/qai_hub_models/models/foot_track_net/perf.yaml +++ b/qai_hub_models/models/foot_track_net/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: Person-Foot-Detection universal_assets: - torchscript_onnx_tflite: mmr62dg2m - torchscript_onnx: mq3e1v3km + torchscript_onnx_tflite: mmxe7l92n + torchscript_onnx: mm5edy7km performance_metrics: - torchscript_onnx_tflite: - inference_time: 4804.0 - throughput: 208.15986677768527 + inference_time: 4762.0 + throughput: 209.99580008399832 estimated_peak_memory_range: - min: 5103616 - max: 13755976 + min: 5095424 + max: 14940680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 135 - job_id: j5we8qw35 + job_id: jp2kr1m4p job_status: Passed torchscript_onnx_qnn: - inference_time: 5050.0 - throughput: 198.01980198019803 + inference_time: 5064.0 + throughput: 197.47235387045814 estimated_peak_memory_range: - min: 3706880 - max: 9152768 + min: 3731456 + max: 11220384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jp2k4lxrp + job_id: jpv6l8wj5 job_status: Passed torchscript_onnx: - inference_time: 5120.0 - throughput: 195.3125 + inference_time: 5061.0 + throughput: 197.58940920766648 estimated_peak_memory_range: - min: 16568320 - max: 20308320 + min: 14704640 + max: 18920064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 202 - job_id: jgjvoezvg + job_id: j5mn07wwp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:18:55Z' + timestamp: '2024-12-11T23:45:52Z' - torchscript_onnx_tflite: - inference_time: 3333.0 - throughput: 300.03000300030004 + inference_time: 3328.0 + throughput: 300.4807692307692 estimated_peak_memory_range: min: 12288 - max: 20782480 + max: 24626912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 135 - job_id: jg9lkw0wg + job_id: jpy1old7p job_status: Passed torchscript_onnx_qnn: - inference_time: 3511.0 - throughput: 284.8191398461977 + inference_time: 3518.0 + throughput: 284.2524161455372 estimated_peak_memory_range: - min: 0 - max: 22016912 + min: 3702784 + max: 30795648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jpy1q6z8p + job_id: jgjvr9lxg job_status: Passed torchscript_onnx: - inference_time: 3621.0 - throughput: 276.16680475006905 + inference_time: 3614.0 + throughput: 276.70171555063644 estimated_peak_memory_range: - min: 2289664 - max: 74443424 + min: 0 + max: 75659008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 202 - job_id: jped8keo5 + job_id: jgn6z49r5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:18:56Z' + timestamp: '2024-12-11T23:45:53Z' - torchscript_onnx_tflite: - inference_time: 3407.0 - throughput: 293.51335485764605 + inference_time: 2996.0 + throughput: 333.7783711615487 estimated_peak_memory_range: - min: 12288 - max: 18722720 + min: 8192 + max: 18315024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 135 - job_id: jp147e28p + job_id: jp0zmwr65 job_status: Passed torchscript_onnx_qnn: - inference_time: 3585.0 - throughput: 278.9400278940028 + inference_time: 3608.0 + throughput: 277.1618625277162 estimated_peak_memory_range: - min: 0 - max: 18416992 + min: 3698688 + max: 25232704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jp0zdl495 + job_id: jped7qv15 job_status: Passed torchscript_onnx: inference_time: 3539.0 throughput: 282.56569652444193 estimated_peak_memory_range: - min: 0 - max: 38125696 + min: 10063872 + max: 48430912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 202 - job_id: jgz38roo5 + job_id: jprvlr49g job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:18:57Z' + timestamp: '2024-12-11T23:45:54Z' - torchscript_onnx_tflite: - inference_time: 4772.0 - throughput: 209.55574182732607 + inference_time: 4827.0 + throughput: 207.16801325875284 estimated_peak_memory_range: - min: 5124096 - max: 11580480 + min: 5087232 + max: 14467584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 135 - job_id: jgdx8onrp + job_id: jp8qen7xp job_status: Passed torchscript_onnx_qnn: - inference_time: 4873.0 - throughput: 205.21239482864766 + inference_time: 4882.0 + throughput: 204.83408439164276 estimated_peak_memory_range: - min: 3731456 - max: 5027280 + min: 3715072 + max: 9819760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jp8q6z2kp + job_id: jgz3l67k5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:18:46Z' + timestamp: '2024-12-11T23:45:44Z' - torchscript_onnx_tflite: - inference_time: 156750.0 - throughput: 6.379585326953748 + inference_time: 156775.0 + throughput: 6.3785680114814225 estimated_peak_memory_range: - min: 4784128 - max: 22250640 + min: 5177344 + max: 23497168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 135 - job_id: j57ykx2v5 + job_id: jgke21y2g job_status: Passed torchscript_onnx_qnn: - inference_time: 156903.0 - throughput: 6.373364435351778 + inference_time: 156762.0 + throughput: 6.379096975032215 estimated_peak_memory_range: - min: 3665920 - max: 9548608 + min: 2965504 + max: 13372224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: j5q6z30np + job_id: jg9lzr3lg job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:18:49Z' + timestamp: '2024-12-11T23:45:46Z' - torchscript_onnx_tflite: - inference_time: 4870.0 - throughput: 205.3388090349076 + inference_time: 4792.0 + throughput: 208.6811352253756 estimated_peak_memory_range: - min: 5120000 - max: 13622264 + min: 5087232 + max: 14847344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 135 - job_id: jp4lmvn85 + job_id: j5q6ln24p job_status: Passed torchscript_onnx_qnn: - inference_time: 4867.0 - throughput: 205.4653790836244 + inference_time: 4953.0 + throughput: 201.8978396931153 estimated_peak_memory_range: - min: 3727360 - max: 5894304 + min: 3735552 + max: 6003920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jglvo34j5 + job_id: jp14n9d2p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:18:50Z' + timestamp: '2024-12-11T23:45:47Z' - torchscript_onnx_tflite: - inference_time: 8296.0 - throughput: 120.54001928640308 + inference_time: 8309.0 + throughput: 120.35142616440005 estimated_peak_memory_range: min: 5083136 - max: 21052896 + max: 21181984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 135 - job_id: jpxk3y935 + job_id: jglvydk85 job_status: Passed torchscript_onnx_qnn: - inference_time: 8873.0 - throughput: 112.70145384875465 + inference_time: 8245.0 + throughput: 121.2856276531231 estimated_peak_memory_range: min: 57344 - max: 6104704 + max: 6006928 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: j56yrn26p + job_id: jgdxdkrep job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:18:51Z' + timestamp: '2024-12-11T23:45:48Z' - torchscript_onnx_tflite: - inference_time: 4870.0 - throughput: 205.3388090349076 + inference_time: 4898.0 + throughput: 204.1649652919559 estimated_peak_memory_range: - min: 5091328 - max: 14719400 + min: 5083136 + max: 14823800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 135 - job_id: j5mno3edp + job_id: j56y8x10p job_status: Passed torchscript_onnx_qnn: - inference_time: 4939.0 - throughput: 202.47013565499088 + inference_time: 4976.0 + throughput: 200.96463022508038 estimated_peak_memory_range: - min: 4640768 - max: 5977128 + min: 3780608 + max: 4980696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jp3jxen3g + job_id: j57yemjl5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:18:52Z' + timestamp: '2024-12-11T23:45:49Z' - torchscript_onnx_tflite: - inference_time: 9240.0 - throughput: 108.22510822510823 + inference_time: 9226.0 + throughput: 108.38933448948623 estimated_peak_memory_range: min: 5103616 - max: 23357584 + max: 26171680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 135 - job_id: jgn6o30k5 + job_id: jp3jzdmlg job_status: Passed torchscript_onnx_qnn: - inference_time: 9613.0 - throughput: 104.0257983980027 + inference_time: 9590.0 + throughput: 104.27528675703859 estimated_peak_memory_range: - min: 2785280 - max: 8552416 + min: 2064384 + max: 8102672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jgo2o3zqp + job_id: jp4ly7xv5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:18:53Z' + timestamp: '2024-12-11T23:45:50Z' - torchscript_onnx_tflite: - inference_time: 7029.0 - throughput: 142.2677479015507 + inference_time: 7017.0 + throughput: 142.51104460595695 estimated_peak_memory_range: min: 5083136 - max: 28582512 + max: 32564816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 135 - job_id: jprvoe60g + job_id: jgo2lxvxp job_status: Passed torchscript_onnx_qnn: - inference_time: 7509.0 - throughput: 133.17352510320947 + inference_time: 7415.0 + throughput: 134.86176668914362 estimated_peak_memory_range: min: 3702784 - max: 26037152 + max: 29838288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jpv6ev0k5 + job_id: jpxklq715 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:18:54Z' + timestamp: '2024-12-11T23:45:51Z' - torchscript_onnx_qnn: - inference_time: 5460.0 - throughput: 183.15018315018315 + inference_time: 5370.0 + throughput: 186.21973929236498 estimated_peak_memory_range: min: 3690496 max: 3690496 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jgkeo3vwg + job_id: j5welkd65 job_status: Passed torchscript_onnx: - inference_time: 5765.0 - throughput: 173.46053772766695 + inference_time: 5493.0 + throughput: 182.04988166757693 estimated_peak_memory_range: - min: 17428480 - max: 17428480 + min: 17518592 + max: 17518592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 202 - job_id: j5we8q235 + job_id: jp2kr174p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:18:58Z' + timestamp: '2024-12-11T23:45:55Z' diff --git a/qai_hub_models/models/foot_track_net_quantized/perf.yaml b/qai_hub_models/models/foot_track_net_quantized/perf.yaml index 71922dfb..03919296 100644 --- a/qai_hub_models/models/foot_track_net_quantized/perf.yaml +++ b/qai_hub_models/models/foot_track_net_quantized/perf.yaml @@ -50,15 +50,15 @@ aggregated: models: - name: Person-Foot-Detection-Quantized universal_assets: - torchscript_onnx_tflite: mq9pgxv2n - torchscript_onnx: mqkkxljzq + torchscript_onnx_tflite: mngg1z55n + torchscript_onnx: mn4l10y7q performance_metrics: - torchscript_onnx_tflite: - inference_time: 1145.0 - throughput: 873.3624454148471 + inference_time: 1144.0 + throughput: 874.1258741258741 estimated_peak_memory_range: min: 12288 - max: 9716312 + max: 5422144 primary_compute_unit: NPU precision: int8 layer_info: @@ -66,14 +66,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jp8q6zykp + job_id: jpxklqy95 job_status: Passed torchscript_onnx_qnn: - inference_time: 1291.0 - throughput: 774.5933384972889 + inference_time: 1288.0 + throughput: 776.3975155279503 estimated_peak_memory_range: - min: 20480 - max: 69628888 + min: 12288 + max: 5095608 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,14 +81,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jp147e48p + job_id: jp3jzdezg job_status: Passed torchscript_onnx: - inference_time: 1665.0 - throughput: 600.6006006006006 + inference_time: 1661.0 + throughput: 602.0469596628537 estimated_peak_memory_range: - min: 12288 - max: 3927168 + min: 16384 + max: 4343776 primary_compute_unit: NPU precision: int8 layer_info: @@ -96,7 +96,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: jglvo3vj5 + job_id: jp14n982p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -105,13 +105,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:18:09Z' + timestamp: '2024-12-11T23:45:06Z' - torchscript_onnx_tflite: inference_time: 797.0 throughput: 1254.7051442910915 estimated_peak_memory_range: min: 12288 - max: 22853584 + max: 30279344 primary_compute_unit: NPU precision: int8 layer_info: @@ -119,14 +119,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jgkeo3xwg + job_id: j5mn073qp job_status: Passed torchscript_onnx_qnn: - inference_time: 870.0 - throughput: 1149.4252873563219 + inference_time: 867.0 + throughput: 1153.4025374855826 estimated_peak_memory_range: min: 937984 - max: 27011104 + max: 25135536 primary_compute_unit: NPU precision: int8 layer_info: @@ -134,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jgdx8oxrp + job_id: jgo2lx3dp job_status: Passed torchscript_onnx: - inference_time: 1117.0 - throughput: 895.2551477170994 + inference_time: 1167.0 + throughput: 856.898029134533 estimated_peak_memory_range: - min: 61440 - max: 60209008 + min: 94208 + max: 60563776 primary_compute_unit: NPU precision: int8 layer_info: @@ -149,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: j56yrny6p + job_id: jgdxdkvep job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -158,13 +158,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:18:10Z' + timestamp: '2024-12-11T23:45:08Z' - torchscript_onnx_tflite: - inference_time: 771.0 - throughput: 1297.0168612191958 + inference_time: 774.0 + throughput: 1291.9896640826873 estimated_peak_memory_range: min: 8192 - max: 21290224 + max: 21787696 primary_compute_unit: NPU precision: int8 layer_info: @@ -172,14 +172,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: j5q6z3qnp + job_id: jgn6z43m5 job_status: Passed torchscript_onnx_qnn: - inference_time: 901.0 - throughput: 1109.8779134295228 + inference_time: 840.0 + throughput: 1190.4761904761904 estimated_peak_memory_range: min: 0 - max: 21605760 + max: 22871664 primary_compute_unit: NPU precision: int8 layer_info: @@ -187,14 +187,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jp4lmvl85 + job_id: jpv6l8vm5 job_status: Passed torchscript_onnx: - inference_time: 1086.0 - throughput: 920.8103130755064 + inference_time: 1126.0 + throughput: 888.0994671403197 estimated_peak_memory_range: min: 0 - max: 41941616 + max: 42236768 primary_compute_unit: NPU precision: int8 layer_info: @@ -202,7 +202,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: jp3jxej3g + job_id: j57yemdl5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -211,13 +211,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:18:11Z' + timestamp: '2024-12-11T23:45:09Z' - torchscript_onnx_tflite: - inference_time: 5568.0 - throughput: 179.5977011494253 + inference_time: 5661.0 + throughput: 176.64723547076488 estimated_peak_memory_range: min: 1277952 - max: 27300112 + max: 31017648 primary_compute_unit: NPU precision: int8 layer_info: @@ -225,14 +225,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jglvo3mj5 + job_id: jprvlreeg job_status: Passed torchscript_onnx_qnn: - inference_time: 6908.0 - throughput: 144.75969889982628 + inference_time: 6809.0 + throughput: 146.864444118079 estimated_peak_memory_range: - min: 958464 - max: 9012416 + min: 937984 + max: 7585360 primary_compute_unit: NPU precision: int8 layer_info: @@ -240,7 +240,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jpxk3yk35 + job_id: jgjvr9e8g job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -249,13 +249,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T02:17:58Z' + timestamp: '2024-12-11T23:44:56Z' - torchscript_onnx_tflite: - inference_time: 27211.0 - throughput: 36.74984381316379 + inference_time: 26300.0 + throughput: 38.02281368821293 estimated_peak_memory_range: - min: 1314816 - max: 7254120 + min: 1335296 + max: 11992176 primary_compute_unit: NPU precision: int8 layer_info: @@ -263,7 +263,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: j56yrn46p + job_id: jp2kr1lmp job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -272,13 +272,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T02:17:46Z' + timestamp: '2024-12-11T23:44:44Z' - torchscript_onnx_tflite: - inference_time: 1189.0 - throughput: 841.0428931875525 + inference_time: 1145.0 + throughput: 873.3624454148471 estimated_peak_memory_range: min: 12288 - max: 7630176 + max: 5789472 primary_compute_unit: NPU precision: int8 layer_info: @@ -286,14 +286,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jp3jxe03g + job_id: jpy1ol64p job_status: Passed torchscript_onnx_qnn: - inference_time: 1229.0 - throughput: 813.6696501220505 + inference_time: 1200.0 + throughput: 833.3333333333334 estimated_peak_memory_range: - min: 946176 - max: 2380736 + min: 958464 + max: 2222808 primary_compute_unit: NPU precision: int8 layer_info: @@ -301,7 +301,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: j5mno3ndp + job_id: jped7qk05 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -310,13 +310,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:17:59Z' + timestamp: '2024-12-11T23:44:57Z' - torchscript_onnx_tflite: - inference_time: 19540.0 - throughput: 51.17707267144319 + inference_time: 19550.0 + throughput: 51.150895140664964 estimated_peak_memory_range: - min: 1298432 - max: 23635008 + min: 1331200 + max: 26885984 primary_compute_unit: NPU precision: int8 layer_info: @@ -324,14 +324,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jgo2o36qp + job_id: jp0zmwle5 job_status: Passed torchscript_onnx_qnn: - inference_time: 19788.0 - throughput: 50.53567818880129 + inference_time: 19789.0 + throughput: 50.53312446308555 estimated_peak_memory_range: min: 937984 - max: 6754048 + max: 11628144 primary_compute_unit: NPU precision: int8 layer_info: @@ -339,7 +339,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jprvoev0g + job_id: j5welkqj5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -348,13 +348,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:18:02Z' + timestamp: '2024-12-11T23:44:59Z' - torchscript_onnx_tflite: - inference_time: 1150.0 - throughput: 869.5652173913044 + inference_time: 1149.0 + throughput: 870.3220191470845 estimated_peak_memory_range: min: 12288 - max: 9431768 + max: 106424200 primary_compute_unit: NPU precision: int8 layer_info: @@ -362,14 +362,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jpv6ev6k5 + job_id: jp8qenz8p job_status: Passed torchscript_onnx_qnn: - inference_time: 1233.0 - throughput: 811.0300081103001 + inference_time: 1247.0 + throughput: 801.924619085806 estimated_peak_memory_range: - min: 942080 - max: 3147976 + min: 729088 + max: 3409256 primary_compute_unit: NPU precision: int8 layer_info: @@ -377,7 +377,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jp2k4lkrp + job_id: jg9lzrwvg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -386,13 +386,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:18:03Z' + timestamp: '2024-12-11T23:45:01Z' - torchscript_onnx_tflite: - inference_time: 2230.0 - throughput: 448.4304932735426 + inference_time: 2250.0 + throughput: 444.44444444444446 estimated_peak_memory_range: min: 12288 - max: 21070096 + max: 20982656 primary_compute_unit: NPU precision: int8 layer_info: @@ -400,14 +400,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jgjvoevvg + job_id: jgke213og job_status: Passed torchscript_onnx_qnn: - inference_time: 2363.0 - throughput: 423.1908590774439 + inference_time: 2320.0 + throughput: 431.0344827586207 estimated_peak_memory_range: - min: 0 - max: 6000656 + min: 970752 + max: 6847744 primary_compute_unit: NPU precision: int8 layer_info: @@ -415,7 +415,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jpy1q618p + job_id: jp14n9elp job_status: Passed reference_device_info: name: SA8295P ADP @@ -424,13 +424,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:18:04Z' + timestamp: '2024-12-11T23:45:02Z' - torchscript_onnx_tflite: - inference_time: 1188.0 - throughput: 841.7508417508418 + inference_time: 1155.0 + throughput: 865.8008658008658 estimated_peak_memory_range: min: 12288 - max: 9485632 + max: 73807712 primary_compute_unit: NPU precision: int8 layer_info: @@ -438,14 +438,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jped8kdo5 + job_id: j5q6ln3mp job_status: Passed torchscript_onnx_qnn: - inference_time: 1235.0 - throughput: 809.7165991902834 + inference_time: 1237.0 + throughput: 808.4074373484236 estimated_peak_memory_range: - min: 958464 - max: 2192408 + min: 32768 + max: 1246944 primary_compute_unit: NPU precision: int8 layer_info: @@ -453,7 +453,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jp0zdlz95 + job_id: jgdxdkolp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -462,13 +462,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:18:05Z' + timestamp: '2024-12-11T23:45:03Z' - torchscript_onnx_tflite: - inference_time: 1859.0 - throughput: 537.9236148466917 + inference_time: 1884.0 + throughput: 530.7855626326964 estimated_peak_memory_range: min: 12288 - max: 21285952 + max: 26325792 primary_compute_unit: NPU precision: int8 layer_info: @@ -476,14 +476,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jgz38r3o5 + job_id: jglvyd3l5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2092.0 - throughput: 478.0114722753346 + inference_time: 2075.0 + throughput: 481.9277108433735 estimated_peak_memory_range: - min: 0 - max: 5744256 + min: 856064 + max: 6849104 primary_compute_unit: NPU precision: int8 layer_info: @@ -491,7 +491,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jp8q6zqkp + job_id: j5welk965 job_status: Passed reference_device_info: name: SA8775P ADP @@ -500,13 +500,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:18:06Z' + timestamp: '2024-12-11T23:45:04Z' - torchscript_onnx_tflite: - inference_time: 1402.0 - throughput: 713.2667617689016 + inference_time: 1418.0 + throughput: 705.2186177715091 estimated_peak_memory_range: min: 12288 - max: 25202336 + max: 27160416 primary_compute_unit: NPU precision: int8 layer_info: @@ -514,14 +514,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: j5we8qe35 + job_id: j56y8xn7p job_status: Passed torchscript_onnx_qnn: - inference_time: 1508.0 - throughput: 663.1299734748011 + inference_time: 1553.0 + throughput: 643.915003219575 estimated_peak_memory_range: min: 937984 - max: 28146368 + max: 29447824 primary_compute_unit: NPU precision: int8 layer_info: @@ -529,7 +529,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jgkeo3ewg + job_id: jg9lzr4lg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -538,10 +538,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:18:08Z' + timestamp: '2024-12-11T23:45:05Z' - torchscript_onnx_qnn: - inference_time: 2026.0 - throughput: 493.58341559723596 + inference_time: 1452.0 + throughput: 688.7052341597796 estimated_peak_memory_range: min: 925696 max: 925696 @@ -552,14 +552,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 198 - job_id: jgn6o36k5 + job_id: jgz3l6r65 job_status: Passed torchscript_onnx: - inference_time: 1790.0 - throughput: 558.659217877095 + inference_time: 1776.0 + throughput: 563.063063063063 estimated_peak_memory_range: - min: 8605696 - max: 8605696 + min: 8695808 + max: 8695808 primary_compute_unit: NPU precision: int8 layer_info: @@ -567,7 +567,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: jgo2o32qp + job_id: jp4ly7wv5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -576,4 +576,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:18:12Z' + timestamp: '2024-12-11T23:45:10Z' diff --git a/qai_hub_models/models/gear_guard_net/info.yaml b/qai_hub_models/models/gear_guard_net/info.yaml index bb8bfe04..2aed2ac8 100644 --- a/qai_hub_models/models/gear_guard_net/info.yaml +++ b/qai_hub_models/models/gear_guard_net/info.yaml @@ -31,3 +31,4 @@ has_animated_banner: true license_type: bsd-3-clause deploy_license_type: AI Model Hub License dataset: [] +labels_file: ppe_labels.txt diff --git a/qai_hub_models/models/gear_guard_net/perf.yaml b/qai_hub_models/models/gear_guard_net/perf.yaml index 8b7a6d44..b06281f4 100644 --- a/qai_hub_models/models/gear_guard_net/perf.yaml +++ b/qai_hub_models/models/gear_guard_net/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: PPE-Detection universal_assets: - torchscript_onnx_tflite: mqv4p0kxq - torchscript_onnx: mq3e1v76m + torchscript_onnx_tflite: mn1wzd5zm + torchscript_onnx: mq8dkr3vm performance_metrics: - torchscript_onnx_tflite: - inference_time: 682.0 - throughput: 1466.275659824047 + inference_time: 677.0 + throughput: 1477.1048744460857 estimated_peak_memory_range: - min: 122880 - max: 82776592 + min: 20480 + max: 83299520 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 77 - job_id: jgn6om8r5 + job_id: j5welk2j5 job_status: Passed torchscript_onnx_qnn: - inference_time: 751.0 - throughput: 1331.5579227696405 + inference_time: 756.0 + throughput: 1322.7513227513227 estimated_peak_memory_range: - min: 753664 - max: 62546208 + min: 16384 + max: 62416712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jp3jx12lg + job_id: jp2kr19mp job_status: Passed torchscript_onnx: - inference_time: 1053.0 - throughput: 949.667616334283 + inference_time: 1105.0 + throughput: 904.9773755656108 estimated_peak_memory_range: - min: 12288 - max: 15534832 + min: 651264 + max: 2322824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 107 - job_id: jp147ez8p + job_id: jgjvr928g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:17:08Z' + timestamp: '2024-12-11T23:44:08Z' - torchscript_onnx_tflite: inference_time: 494.0 throughput: 2024.2914979757086 estimated_peak_memory_range: - min: 16384 - max: 15543296 + min: 12288 + max: 21819632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 77 - job_id: jprvo2j9g + job_id: jg9lzrjvg job_status: Passed torchscript_onnx_qnn: inference_time: 544.0 throughput: 1838.235294117647 estimated_peak_memory_range: - min: 0 - max: 17879696 + min: 757760 + max: 21305792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jgo2onqxp + job_id: jpy1olj4p job_status: Passed torchscript_onnx: - inference_time: 848.0 - throughput: 1179.245283018868 + inference_time: 861.0 + throughput: 1161.4401858304298 estimated_peak_memory_range: min: 0 - max: 48346688 + max: 50149440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 107 - job_id: jgdx8o1rp + job_id: jped7qw05 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:17:09Z' + timestamp: '2024-12-11T23:44:09Z' - torchscript_onnx_tflite: inference_time: 412.0 throughput: 2427.1844660194174 estimated_peak_memory_range: - min: 114688 - max: 14738320 + min: 12288 + max: 17980144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 77 - job_id: jp2k49n4p + job_id: jp14n9ylp job_status: Passed torchscript_onnx_qnn: - inference_time: 530.0 - throughput: 1886.7924528301887 + inference_time: 528.0 + throughput: 1893.939393939394 estimated_peak_memory_range: - min: 0 - max: 13954336 + min: 753664 + max: 14556352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jpv6erxj5 + job_id: jp0zmw2e5 job_status: Passed torchscript_onnx: - inference_time: 849.0 - throughput: 1177.8563015312131 + inference_time: 848.0 + throughput: 1179.245283018868 estimated_peak_memory_range: min: 0 - max: 24882752 + max: 24643120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 107 - job_id: j57ykxrv5 + job_id: jgz3l6j65 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:17:10Z' + timestamp: '2024-12-11T23:44:10Z' - torchscript_onnx_tflite: - inference_time: 683.0 - throughput: 1464.1288433382138 + inference_time: 678.0 + throughput: 1474.9262536873157 estimated_peak_memory_range: - min: 16384 - max: 73016288 + min: 24576 + max: 83329352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 77 - job_id: jpy1qj07p + job_id: jgdxdkelp job_status: Passed torchscript_onnx_qnn: inference_time: 728.0 throughput: 1373.6263736263736 estimated_peak_memory_range: - min: 774144 - max: 2479776 + min: 765952 + max: 1908288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jgjvo24xg + job_id: jp8qenm8p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:16:59Z' + timestamp: '2024-12-11T23:43:58Z' - torchscript_onnx_tflite: - inference_time: 29048.0 - throughput: 34.42577802258331 + inference_time: 29043.0 + throughput: 34.43170471370038 estimated_peak_memory_range: - min: 155648 - max: 14035904 + min: 131072 + max: 15660240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 77 - job_id: jp0zd2765 + job_id: j57yemlr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 29306.0 - throughput: 34.122705248072066 + inference_time: 29314.0 + throughput: 34.11339291805963 estimated_peak_memory_range: - min: 737280 - max: 6516112 + min: 753664 + max: 11296832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jgz38jkk5 + job_id: j5q6lnrmp job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:17:01Z' + timestamp: '2024-12-11T23:44:01Z' - torchscript_onnx_tflite: - inference_time: 684.0 - throughput: 1461.9883040935672 + inference_time: 681.0 + throughput: 1468.4287812041116 estimated_peak_memory_range: - min: 135168 - max: 83741568 + min: 126976 + max: 83570592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 77 - job_id: jp8q6mvxp + job_id: jp4ly7dl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 738.0 - throughput: 1355.0135501355014 + inference_time: 737.0 + throughput: 1356.85210312076 estimated_peak_memory_range: min: 770048 - max: 2386880 + max: 2056272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: j5we8q665 + job_id: jglvyd2l5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:17:02Z' + timestamp: '2024-12-11T23:44:02Z' - torchscript_onnx_tflite: - inference_time: 1822.0 - throughput: 548.847420417124 + inference_time: 1829.0 + throughput: 546.7468562055768 estimated_peak_memory_range: min: 16384 - max: 10437424 + max: 10497184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 77 - job_id: jgkeoqm2g + job_id: jpxklq695 job_status: Passed torchscript_onnx_qnn: - inference_time: 1991.0 - throughput: 502.26017076845807 + inference_time: 2041.0 + throughput: 489.9559039686428 estimated_peak_memory_range: - min: 753664 - max: 6787824 + min: 0 + max: 5912736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jg9lkwnlg + job_id: j56y8xz7p job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:17:03Z' + timestamp: '2024-12-11T23:44:03Z' - torchscript_onnx_tflite: - inference_time: 681.0 - throughput: 1468.4287812041116 + inference_time: 678.0 + throughput: 1474.9262536873157 estimated_peak_memory_range: - min: 16384 - max: 6479344 + min: 28672 + max: 83560368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 77 - job_id: j5q6zro4p + job_id: j5mn076qp job_status: Passed torchscript_onnx_qnn: - inference_time: 743.0 - throughput: 1345.8950201884254 + inference_time: 745.0 + throughput: 1342.2818791946308 estimated_peak_memory_range: - min: 761856 - max: 2181064 + min: 786432 + max: 2014336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jp147ez2p + job_id: jp3jzd1zg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:17:04Z' + timestamp: '2024-12-11T23:44:04Z' - torchscript_onnx_tflite: - inference_time: 1567.0 - throughput: 638.1620931716656 + inference_time: 1542.0 + throughput: 648.5084306095979 estimated_peak_memory_range: - min: 36864 - max: 14343760 + min: 16384 + max: 18381248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 77 - job_id: jglvo2r85 + job_id: jgn6z4mm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1787.0 - throughput: 559.5970900951315 + inference_time: 1758.0 + throughput: 568.8282138794084 estimated_peak_memory_range: min: 753664 - max: 6491312 + max: 6765408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jgdx8o1ep + job_id: jgo2lxndp job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:17:06Z' + timestamp: '2024-12-11T23:44:06Z' - torchscript_onnx_tflite: - inference_time: 1406.0 - throughput: 711.2375533428165 + inference_time: 1412.0 + throughput: 708.2152974504249 estimated_peak_memory_range: min: 0 - max: 14996512 + max: 21023312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 77 - job_id: j56yrzl0p + job_id: jprvlr2eg job_status: Passed torchscript_onnx_qnn: - inference_time: 1494.0 - throughput: 669.3440428380187 + inference_time: 1481.0 + throughput: 675.219446320054 estimated_peak_memory_range: min: 753664 - max: 17825392 + max: 19576128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: j5we8q635 + job_id: jpv6l8rm5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:17:07Z' + timestamp: '2024-12-11T23:44:07Z' - torchscript_onnx_qnn: - inference_time: 829.0 - throughput: 1206.2726176115802 + inference_time: 844.0 + throughput: 1184.8341232227488 estimated_peak_memory_range: min: 737280 max: 737280 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jped8w315 + job_id: jgke21qog job_status: Passed torchscript_onnx: - inference_time: 1142.0 - throughput: 875.6567425569177 + inference_time: 1171.0 + throughput: 853.9709649871904 estimated_peak_memory_range: - min: 13287424 - max: 13287424 + min: 14557184 + max: 14557184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 107 - job_id: jp4lmvr85 + job_id: j5welk3j5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:17:11Z' + timestamp: '2024-12-11T23:44:11Z' diff --git a/qai_hub_models/models/gear_guard_net_quantized/info.yaml b/qai_hub_models/models/gear_guard_net_quantized/info.yaml index 75ecac61..44f76b7f 100644 --- a/qai_hub_models/models/gear_guard_net_quantized/info.yaml +++ b/qai_hub_models/models/gear_guard_net_quantized/info.yaml @@ -32,3 +32,4 @@ has_animated_banner: false license_type: bsd-3-clause deploy_license_type: AI Model Hub License dataset: [] +labels_file: ppe_labels.txt diff --git a/qai_hub_models/models/gear_guard_net_quantized/perf.yaml b/qai_hub_models/models/gear_guard_net_quantized/perf.yaml index 231e1818..f16741c8 100644 --- a/qai_hub_models/models/gear_guard_net_quantized/perf.yaml +++ b/qai_hub_models/models/gear_guard_net_quantized/perf.yaml @@ -50,15 +50,15 @@ aggregated: models: - name: PPE-Detection-Quantized universal_assets: - torchscript_onnx_tflite: mno30z2vn - torchscript_onnx: mq8kx67jq + torchscript_onnx_tflite: mq36e5zrq + torchscript_onnx: mqp3zl5gm performance_metrics: - torchscript_onnx_tflite: - inference_time: 248.0 - throughput: 4032.2580645161293 + inference_time: 253.0 + throughput: 3952.5691699604745 estimated_peak_memory_range: - min: 0 - max: 24769056 + min: 12288 + max: 14742376 primary_compute_unit: NPU precision: int8 layer_info: @@ -66,14 +66,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jgz38jek5 + job_id: jglvyd4l5 job_status: Passed torchscript_onnx_qnn: - inference_time: 313.0 - throughput: 3194.888178913738 + inference_time: 311.0 + throughput: 3215.434083601286 estimated_peak_memory_range: - min: 28672 - max: 13729344 + min: 0 + max: 127689784 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,14 +81,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jpy1qjw7p + job_id: j57yem0r5 job_status: Passed torchscript_onnx: - inference_time: 605.0 - throughput: 1652.892561983471 + inference_time: 592.0 + throughput: 1689.1891891891892 estimated_peak_memory_range: - min: 16384 - max: 16078968 + min: 40960 + max: 9134176 primary_compute_unit: NPU precision: int8 layer_info: @@ -96,7 +96,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: j5we83n65 + job_id: jglvyd6l5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -105,13 +105,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:16:23Z' + timestamp: '2024-12-11T23:43:22Z' - torchscript_onnx_tflite: - inference_time: 188.0 - throughput: 5319.148936170212 + inference_time: 190.0 + throughput: 5263.1578947368425 estimated_peak_memory_range: min: 12288 - max: 14529488 + max: 16772416 primary_compute_unit: NPU precision: int8 layer_info: @@ -119,14 +119,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: j5we83v65 + job_id: j56y8x27p job_status: Passed torchscript_onnx_qnn: - inference_time: 228.0 - throughput: 4385.964912280701 + inference_time: 232.0 + throughput: 4310.3448275862065 estimated_peak_memory_range: min: 0 - max: 17003232 + max: 17462240 primary_compute_unit: NPU precision: int8 layer_info: @@ -134,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jp0zd2q65 + job_id: jp4ly7kl5 job_status: Passed torchscript_onnx: - inference_time: 432.0 - throughput: 2314.814814814815 + inference_time: 460.0 + throughput: 2173.913043478261 estimated_peak_memory_range: min: 0 - max: 52942192 + max: 52953296 primary_compute_unit: NPU precision: int8 layer_info: @@ -149,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: jg9lkyelg + job_id: j56y8xe7p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -158,13 +158,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:16:25Z' + timestamp: '2024-12-11T23:43:25Z' - torchscript_onnx_tflite: - inference_time: 192.0 - throughput: 5208.333333333333 + inference_time: 182.0 + throughput: 5494.505494505494 estimated_peak_memory_range: - min: 8192 - max: 12007216 + min: 831488 + max: 13567664 primary_compute_unit: NPU precision: int8 layer_info: @@ -172,14 +172,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jg9lky1lg + job_id: jp3jzdnzg job_status: Passed torchscript_onnx_qnn: - inference_time: 224.0 - throughput: 4464.285714285715 + inference_time: 243.0 + throughput: 4115.22633744856 estimated_peak_memory_range: - min: 196608 - max: 13421728 + min: 0 + max: 13160960 primary_compute_unit: NPU precision: int8 layer_info: @@ -187,14 +187,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jp8q6m9xp + job_id: jpxklqn95 job_status: Passed torchscript_onnx: - inference_time: 473.0 - throughput: 2114.164904862579 + inference_time: 477.0 + throughput: 2096.4360587002097 estimated_peak_memory_range: min: 0 - max: 29661888 + max: 29614944 primary_compute_unit: NPU precision: int8 layer_info: @@ -202,7 +202,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: jp147wx2p + job_id: jp3jzdvzg job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -211,13 +211,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:16:26Z' + timestamp: '2024-12-11T23:43:26Z' - torchscript_onnx_tflite: - inference_time: 1305.0 - throughput: 766.2835249042146 + inference_time: 1226.0 + throughput: 815.6606851549756 estimated_peak_memory_range: min: 12288 - max: 18696320 + max: 20889248 primary_compute_unit: NPU precision: int8 layer_info: @@ -225,14 +225,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jp147wl2p + job_id: jgo2lxzdp job_status: Passed torchscript_onnx_qnn: - inference_time: 1941.0 - throughput: 515.1983513652756 + inference_time: 1760.0 + throughput: 568.1818181818181 estimated_peak_memory_range: - min: 200704 - max: 8413520 + min: 16384 + max: 7208992 primary_compute_unit: NPU precision: int8 layer_info: @@ -240,7 +240,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jgkeoqn2g + job_id: j5mn07qqp job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -249,13 +249,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T02:16:13Z' + timestamp: '2024-12-11T23:43:13Z' - torchscript_onnx_tflite: - inference_time: 4985.0 - throughput: 200.60180541624874 + inference_time: 4862.0 + throughput: 205.67667626491155 estimated_peak_memory_range: - min: 12288 - max: 7031856 + min: 69632 + max: 3185952 primary_compute_unit: NPU precision: int8 layer_info: @@ -263,7 +263,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jgdx8q9ep + job_id: jpv6l8qm5 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -272,13 +272,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T02:16:02Z' + timestamp: '2024-12-11T23:43:00Z' - torchscript_onnx_tflite: - inference_time: 248.0 - throughput: 4032.2580645161293 + inference_time: 247.0 + throughput: 4048.582995951417 estimated_peak_memory_range: min: 12288 - max: 13615312 + max: 13746304 primary_compute_unit: NPU precision: int8 layer_info: @@ -286,14 +286,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: j57yklwl5 + job_id: jgjvr9d8g job_status: Passed torchscript_onnx_qnn: - inference_time: 302.0 - throughput: 3311.2582781456954 + inference_time: 303.0 + throughput: 3300.3300330033003 estimated_peak_memory_range: - min: 217088 - max: 1368936 + min: 221184 + max: 1851136 primary_compute_unit: NPU precision: int8 layer_info: @@ -301,7 +301,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jglvo2z85 + job_id: jgn6z4lm5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -310,13 +310,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:16:14Z' + timestamp: '2024-12-11T23:43:14Z' - torchscript_onnx_tflite: - inference_time: 3889.0 - throughput: 257.1355104139882 + inference_time: 3915.0 + throughput: 255.4278416347382 estimated_peak_memory_range: - min: 40960 - max: 14028384 + min: 12288 + max: 17320480 primary_compute_unit: NPU precision: int8 layer_info: @@ -324,14 +324,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jp4lmdov5 + job_id: jped7qo05 job_status: Passed torchscript_onnx_qnn: - inference_time: 4261.0 - throughput: 234.6866932644919 + inference_time: 4094.0 + throughput: 244.2598925256473 estimated_peak_memory_range: - min: 196608 - max: 5616112 + min: 200704 + max: 10673200 primary_compute_unit: NPU precision: int8 layer_info: @@ -339,7 +339,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jp3jx13lg + job_id: jp2kr10mp job_status: Passed reference_device_info: name: SA7255P ADP @@ -348,13 +348,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:16:17Z' + timestamp: '2024-12-11T23:43:16Z' - torchscript_onnx_tflite: - inference_time: 255.0 - throughput: 3921.5686274509803 + inference_time: 250.0 + throughput: 4000.0 estimated_peak_memory_range: min: 12288 - max: 14233760 + max: 80427824 primary_compute_unit: NPU precision: int8 layer_info: @@ -362,14 +362,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jpxk36j15 + job_id: jgz3l6265 job_status: Passed torchscript_onnx_qnn: - inference_time: 308.0 - throughput: 3246.753246753247 + inference_time: 306.0 + throughput: 3267.97385620915 estimated_peak_memory_range: - min: 245760 - max: 1592608 + min: 221184 + max: 1506080 primary_compute_unit: NPU precision: int8 layer_info: @@ -377,7 +377,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jgo2on0xp + job_id: jpy1olr4p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -386,13 +386,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:16:18Z' + timestamp: '2024-12-11T23:43:17Z' - torchscript_onnx_tflite: - inference_time: 703.0 - throughput: 1422.475106685633 + inference_time: 722.0 + throughput: 1385.0415512465374 estimated_peak_memory_range: min: 12288 - max: 11569936 + max: 11752256 primary_compute_unit: NPU precision: int8 layer_info: @@ -400,14 +400,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: j5mno62wp + job_id: j5welkwj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 869.0 - throughput: 1150.7479861910242 + inference_time: 913.0 + throughput: 1095.290251916758 estimated_peak_memory_range: - min: 200704 - max: 6192064 + min: 0 + max: 5900992 primary_compute_unit: NPU precision: int8 layer_info: @@ -415,7 +415,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jpv6eroj5 + job_id: jp0zmw3e5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -424,13 +424,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:16:19Z' + timestamp: '2024-12-11T23:43:18Z' - torchscript_onnx_tflite: inference_time: 251.0 throughput: 3984.06374501992 estimated_peak_memory_range: - min: 16384 - max: 52449160 + min: 188416 + max: 96962520 primary_compute_unit: NPU precision: int8 layer_info: @@ -438,14 +438,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jgn6omyr5 + job_id: jg9lzr0vg job_status: Passed torchscript_onnx_qnn: - inference_time: 306.0 - throughput: 3267.97385620915 + inference_time: 308.0 + throughput: 3246.753246753247 estimated_peak_memory_range: - min: 217088 - max: 1480424 + min: 212992 + max: 1428384 primary_compute_unit: NPU precision: int8 layer_info: @@ -453,7 +453,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jgjvo2mxg + job_id: jp8qen08p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -462,13 +462,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:16:20Z' + timestamp: '2024-12-11T23:43:19Z' - torchscript_onnx_tflite: - inference_time: 535.0 - throughput: 1869.1588785046729 + inference_time: 545.0 + throughput: 1834.8623853211009 estimated_peak_memory_range: min: 16384 - max: 11185776 + max: 16975696 primary_compute_unit: NPU precision: int8 layer_info: @@ -476,14 +476,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jprvo2q9g + job_id: jp14n92lp job_status: Passed torchscript_onnx_qnn: - inference_time: 739.0 - throughput: 1353.1799729364006 + inference_time: 822.0 + throughput: 1216.54501216545 estimated_peak_memory_range: - min: 204800 - max: 5972112 + min: 200704 + max: 6219776 primary_compute_unit: NPU precision: int8 layer_info: @@ -491,7 +491,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jped8w115 + job_id: jgke217og job_status: Passed reference_device_info: name: SA8775P ADP @@ -500,13 +500,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:16:21Z' + timestamp: '2024-12-11T23:43:20Z' - torchscript_onnx_tflite: - inference_time: 359.0 - throughput: 2785.515320334262 + inference_time: 356.0 + throughput: 2808.9887640449438 estimated_peak_memory_range: - min: 36864 - max: 16467344 + min: 0 + max: 22695040 primary_compute_unit: NPU precision: int8 layer_info: @@ -514,14 +514,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jp2k4964p + job_id: jgdxdknlp job_status: Passed torchscript_onnx_qnn: - inference_time: 416.0 - throughput: 2403.846153846154 + inference_time: 420.0 + throughput: 2380.9523809523807 estimated_peak_memory_range: min: 200704 - max: 18104464 + max: 17115920 primary_compute_unit: NPU precision: int8 layer_info: @@ -529,7 +529,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jgz38j9k5 + job_id: j5q6lnemp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -538,13 +538,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:16:22Z' + timestamp: '2024-12-11T23:43:21Z' - torchscript_onnx_qnn: - inference_time: 403.0 - throughput: 2481.3895781637716 + inference_time: 417.0 + throughput: 2398.0815347721823 estimated_peak_memory_range: - min: 405504 - max: 405504 + min: 462848 + max: 462848 primary_compute_unit: NPU precision: int8 layer_info: @@ -552,14 +552,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: j56yrzj0p + job_id: jprvlr8eg job_status: Passed torchscript_onnx: - inference_time: 645.0 - throughput: 1550.3875968992247 + inference_time: 615.0 + throughput: 1626.0162601626016 estimated_peak_memory_range: - min: 8695808 - max: 8695808 + min: 8425472 + max: 8425472 primary_compute_unit: NPU precision: int8 layer_info: @@ -567,7 +567,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: jgdx8qlep + job_id: jgo2lxkdp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -576,4 +576,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:16:27Z' + timestamp: '2024-12-11T23:43:27Z' diff --git a/qai_hub_models/models/googlenet/perf.yaml b/qai_hub_models/models/googlenet/perf.yaml index b82bad3d..a5191eb2 100644 --- a/qai_hub_models/models/googlenet/perf.yaml +++ b/qai_hub_models/models/googlenet/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: GoogLeNet universal_assets: - torchscript_onnx_tflite: mmr62d90m - torchscript_onnx: mmdy8drkm + torchscript_onnx_tflite: mn1wzdvzm + torchscript_onnx: mmxe7l3en performance_metrics: - torchscript_onnx_tflite: - inference_time: 1022.0 - throughput: 978.4735812133073 + inference_time: 1016.0 + throughput: 984.2519685039371 estimated_peak_memory_range: - min: 0 - max: 4252248 + min: 16384 + max: 56500280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: j56yrzk0p + job_id: jp0zmwz25 job_status: Passed torchscript_onnx_qnn: - inference_time: 1083.0 - throughput: 923.3610341643582 + inference_time: 1076.0 + throughput: 929.368029739777 estimated_peak_memory_range: - min: 40960 - max: 35584200 + min: 626688 + max: 6086360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jgdx8qwep + job_id: jgz3l63z5 job_status: Passed torchscript_onnx: - inference_time: 1152.0 - throughput: 868.0555555555555 + inference_time: 1184.0 + throughput: 844.5945945945946 estimated_peak_memory_range: - min: 618496 - max: 2377560 + min: 0 + max: 49227160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jgkeoqz2g + job_id: jpxklq995 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:15:25Z' + timestamp: '2024-12-11T23:42:26Z' - torchscript_onnx_tflite: inference_time: 640.0 throughput: 1562.5 estimated_peak_memory_range: min: 16384 - max: 16553248 + max: 19681648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jp3jx1ylg + job_id: jp8qenqzp job_status: Passed torchscript_onnx_qnn: - inference_time: 691.0 - throughput: 1447.178002894356 + inference_time: 686.0 + throughput: 1457.725947521866 estimated_peak_memory_range: min: 618496 - max: 17770048 + max: 19889376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: j57yklzl5 + job_id: j5welkez5 job_status: Passed torchscript_onnx: - inference_time: 784.0 - throughput: 1275.5102040816328 + inference_time: 792.0 + throughput: 1262.6262626262626 estimated_peak_memory_range: min: 0 - max: 54659056 + max: 59714272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: j5q6zr84p + job_id: j5mn07eqp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:15:26Z' + timestamp: '2024-12-11T23:42:27Z' - torchscript_onnx_tflite: - inference_time: 695.0 - throughput: 1438.8489208633093 + inference_time: 678.0 + throughput: 1474.9262536873157 estimated_peak_memory_range: min: 12288 - max: 12466800 + max: 14630304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jgo2onjxp + job_id: jgke21eyg job_status: Passed torchscript_onnx_qnn: - inference_time: 724.0 - throughput: 1381.2154696132598 + inference_time: 728.0 + throughput: 1373.6263736263736 estimated_peak_memory_range: min: 614400 - max: 12549616 + max: 16149984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jp4lmdqv5 + job_id: jg9lzrlqg job_status: Passed torchscript_onnx: - inference_time: 845.0 - throughput: 1183.4319526627219 + inference_time: 849.0 + throughput: 1177.8563015312131 estimated_peak_memory_range: min: 0 - max: 20981888 + max: 21986608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jglvo2n85 + job_id: jgn6z40m5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:15:27Z' + timestamp: '2024-12-11T23:42:28Z' - torchscript_onnx_tflite: inference_time: 1014.0 throughput: 986.1932938856016 estimated_peak_memory_range: min: 16384 - max: 56632816 + max: 4180968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jpv6erjj5 + job_id: jglvydve5 job_status: Passed torchscript_onnx_qnn: - inference_time: 900.0 - throughput: 1111.111111111111 + inference_time: 895.0 + throughput: 1117.31843575419 estimated_peak_memory_range: - min: 659456 - max: 1960488 + min: 630784 + max: 1771600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jpxk36v15 + job_id: jp14n94kp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:15:16Z' + timestamp: '2024-12-11T23:42:16Z' - torchscript_onnx_tflite: - inference_time: 36255.0 - throughput: 27.582402427251413 + inference_time: 36247.0 + throughput: 27.588490081937817 estimated_peak_memory_range: min: 61440 - max: 12800032 + max: 14805792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jgjvo2jxg + job_id: j56y8xyvp job_status: Passed torchscript_onnx_qnn: - inference_time: 36494.0 - throughput: 27.401764673644983 + inference_time: 36425.0 + throughput: 27.45367192862045 estimated_peak_memory_range: - min: 638976 - max: 6054336 + min: 602112 + max: 11183888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jgn6om2r5 + job_id: j5welkej5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:15:18Z' + timestamp: '2024-12-11T23:42:18Z' - torchscript_onnx_tflite: - inference_time: 1016.0 - throughput: 984.2519685039371 + inference_time: 1018.0 + throughput: 982.3182711198428 estimated_peak_memory_range: - min: 20480 - max: 5196416 + min: 16384 + max: 56938688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jped8wj15 + job_id: jp3jzdjxg job_status: Passed torchscript_onnx_qnn: - inference_time: 903.0 - throughput: 1107.4197120708748 + inference_time: 901.0 + throughput: 1109.8779134295228 estimated_peak_memory_range: min: 634880 - max: 2027744 + max: 2128576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jprvo2k9g + job_id: jg9lzrlvg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:15:19Z' + timestamp: '2024-12-11T23:42:20Z' - torchscript_onnx_tflite: - inference_time: 2012.0 - throughput: 497.0178926441352 + inference_time: 1997.0 + throughput: 500.75112669003505 estimated_peak_memory_range: min: 16384 - max: 10260352 + max: 10447120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jgz38j1k5 + job_id: jgo2lx24p job_status: Passed torchscript_onnx_qnn: - inference_time: 1991.0 - throughput: 502.26017076845807 + inference_time: 1977.0 + throughput: 505.8168942842691 estimated_peak_memory_range: min: 0 - max: 5988208 + max: 5934912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jp2k4984p + job_id: jp14n94lp job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:15:20Z' + timestamp: '2024-12-11T23:42:21Z' - torchscript_onnx_tflite: - inference_time: 1021.0 - throughput: 979.4319294809011 + inference_time: 1020.0 + throughput: 980.3921568627451 estimated_peak_memory_range: - min: 20480 - max: 55652880 + min: 16384 + max: 56411592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: j5we83o65 + job_id: jpv6l8675 job_status: Passed torchscript_onnx_qnn: - inference_time: 896.0 - throughput: 1116.0714285714287 + inference_time: 908.0 + throughput: 1101.3215859030836 estimated_peak_memory_range: min: 638976 - max: 1835640 + max: 1886504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jpy1qje7p + job_id: jgdxdkxlp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:15:22Z' + timestamp: '2024-12-11T23:42:22Z' - torchscript_onnx_tflite: - inference_time: 2192.0 - throughput: 456.2043795620438 + inference_time: 2166.0 + throughput: 461.6805170821791 estimated_peak_memory_range: - min: 20480 - max: 11720208 + min: 16384 + max: 16185648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jg9lkyvlg + job_id: jgjvr9v7g job_status: Passed torchscript_onnx_qnn: - inference_time: 2125.0 - throughput: 470.5882352941176 + inference_time: 2138.0 + throughput: 467.7268475210477 estimated_peak_memory_range: - min: 622592 - max: 6389392 + min: 0 + max: 5991520 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jp0zd2y65 + job_id: j57yem2r5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:15:23Z' + timestamp: '2024-12-11T23:42:23Z' - torchscript_onnx_tflite: - inference_time: 1496.0 - throughput: 668.4491978609626 + inference_time: 1494.0 + throughput: 669.3440428380187 estimated_peak_memory_range: - min: 16384 - max: 17059344 + min: 20480 + max: 21707872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jp147w02p + job_id: jped7qd75 job_status: Passed torchscript_onnx_qnn: inference_time: 1565.0 throughput: 638.9776357827476 estimated_peak_memory_range: min: 618496 - max: 17968576 + max: 24872992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jp8q6moxp + job_id: jp4ly7nl5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:15:24Z' + timestamp: '2024-12-11T23:42:24Z' - torchscript_onnx_qnn: - inference_time: 1051.0 - throughput: 951.4747859181732 + inference_time: 1072.0 + throughput: 932.8358208955224 estimated_peak_memory_range: min: 602112 max: 602112 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: j5mno6rwp + job_id: jgdxdkxkp job_status: Passed torchscript_onnx: - inference_time: 1291.0 - throughput: 774.5933384972889 + inference_time: 1314.0 + throughput: 761.03500761035 estimated_peak_memory_range: - min: 14536704 - max: 14536704 + min: 14475264 + max: 14475264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: j56yrz60p + job_id: jprvlr6eg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:15:29Z' + timestamp: '2024-12-11T23:42:29Z' diff --git a/qai_hub_models/models/googlenet_quantized/evaluate.py b/qai_hub_models/models/googlenet_quantized/evaluate.py index 57156cc6..94a5ccc1 100644 --- a/qai_hub_models/models/googlenet_quantized/evaluate.py +++ b/qai_hub_models/models/googlenet_quantized/evaluate.py @@ -25,6 +25,7 @@ def main(): model_cls=Model, default_split_size=2500, supported_datasets=SUPPORTED_DATASETS, + supports_onnx=False, is_hub_quantized=True, ) args = parser.parse_args() diff --git a/qai_hub_models/models/googlenet_quantized/export.py b/qai_hub_models/models/googlenet_quantized/export.py index 89eb62c8..319f8042 100644 --- a/qai_hub_models/models/googlenet_quantized/export.py +++ b/qai_hub_models/models/googlenet_quantized/export.py @@ -241,7 +241,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, is_hub_quantized=True) + parser = export_parser(model_cls=Model, supports_onnx=False, is_hub_quantized=True) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/googlenet_quantized/info.yaml b/qai_hub_models/models/googlenet_quantized/info.yaml index fde766e8..db4f2d75 100644 --- a/qai_hub_models/models/googlenet_quantized/info.yaml +++ b/qai_hub_models/models/googlenet_quantized/info.yaml @@ -10,6 +10,7 @@ description: GoogLeNet is a machine learning model that can classify images from use_case: Image Classification tags: - quantized +imsdk_supported: true research_paper: https://arxiv.org/abs/1409.4842 research_paper_title: Going Deeper with Convolutions license: https://github.com/pytorch/vision/blob/main/LICENSE diff --git a/qai_hub_models/models/googlenet_quantized/perf.yaml b/qai_hub_models/models/googlenet_quantized/perf.yaml index 1afe6bb1..0ada81ee 100644 --- a/qai_hub_models/models/googlenet_quantized/perf.yaml +++ b/qai_hub_models/models/googlenet_quantized/perf.yaml @@ -50,15 +50,14 @@ aggregated: models: - name: GoogLeNetQuantized universal_assets: - torchscript_onnx_tflite: mqkkx3e7q - torchscript_onnx: mnlvg3rem + torchscript_onnx_tflite: mq36e2j6q performance_metrics: - torchscript_onnx_tflite: - inference_time: 282.0 - throughput: 3546.099290780142 + inference_time: 283.0 + throughput: 3533.5689045936397 estimated_peak_memory_range: - min: 20480 - max: 10057640 + min: 12288 + max: 77077648 primary_compute_unit: NPU precision: int8 layer_info: @@ -66,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jp0zdedn5 + job_id: jgke2n1ng job_status: Passed torchscript_onnx_qnn: - inference_time: 339.0 - throughput: 2949.8525073746314 + inference_time: 345.0 + throughput: 2898.550724637681 estimated_peak_memory_range: - min: 28672 - max: 9796728 + min: 12288 + max: 115677360 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: j5we8m1m5 + job_id: jp14nlr7p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -90,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T04:02:51Z' + timestamp: '2024-12-12T01:31:21Z' - torchscript_onnx_tflite: - inference_time: 209.0 - throughput: 4784.688995215311 + inference_time: 207.0 + throughput: 4830.917874396136 estimated_peak_memory_range: - min: 0 - max: 15841472 + min: 12288 + max: 17699472 primary_compute_unit: NPU precision: int8 layer_info: @@ -104,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jp8q6w6op + job_id: j5q6lknop job_status: Passed torchscript_onnx_qnn: - inference_time: 253.0 - throughput: 3952.5691699604745 + inference_time: 249.0 + throughput: 4016.0642570281125 estimated_peak_memory_range: min: 0 - max: 13733936 + max: 17621504 primary_compute_unit: NPU precision: int8 layer_info: @@ -119,7 +118,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jg9lk9x8g + job_id: jgdxd9jzp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -128,13 +127,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T04:02:53Z' + timestamp: '2024-12-12T01:31:23Z' - torchscript_onnx_tflite: - inference_time: 216.0 - throughput: 4629.62962962963 + inference_time: 181.0 + throughput: 5524.861878453039 estimated_peak_memory_range: - min: 28672 - max: 10938384 + min: 12288 + max: 10955968 primary_compute_unit: NPU precision: int8 layer_info: @@ -142,14 +141,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jgkeorong + job_id: jglvyzjm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 253.0 - throughput: 3952.5691699604745 + inference_time: 256.0 + throughput: 3906.25 estimated_peak_memory_range: min: 0 - max: 11570672 + max: 11018080 primary_compute_unit: NPU precision: int8 layer_info: @@ -157,7 +156,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jp147qv7p + job_id: j57yewq95 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -166,13 +165,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T04:02:54Z' + timestamp: '2024-12-12T01:31:25Z' - torchscript_onnx_tflite: - inference_time: 885.0 - throughput: 1129.9435028248588 + inference_time: 968.0 + throughput: 1033.0578512396694 estimated_peak_memory_range: min: 12288 - max: 14688320 + max: 19067008 primary_compute_unit: NPU precision: int8 layer_info: @@ -180,14 +179,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: j5q6z9zop + job_id: j56y8jkyp job_status: Passed torchscript_onnx_qnn: - inference_time: 1201.0 - throughput: 832.6394671107411 + inference_time: 1206.0 + throughput: 829.1873963515754 estimated_peak_memory_range: - min: 12288 - max: 8013440 + min: 163840 + max: 12060688 primary_compute_unit: NPU precision: int8 layer_info: @@ -195,7 +194,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jgdx87zzp + job_id: jp4lyoz15 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -204,13 +203,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T04:02:33Z' + timestamp: '2024-12-12T01:31:27Z' - torchscript_onnx_tflite: - inference_time: 5722.0 - throughput: 174.76406850751485 + inference_time: 5779.0 + throughput: 173.04031839418585 estimated_peak_memory_range: - min: 49152 - max: 7709672 + min: 12288 + max: 2478648 primary_compute_unit: NPU precision: int8 layer_info: @@ -218,7 +217,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jglvoe1m5 + job_id: jp3jz3yng job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -227,13 +226,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T04:02:13Z' + timestamp: '2024-12-12T01:31:07Z' - torchscript_onnx_tflite: - inference_time: 283.0 - throughput: 3533.5689045936397 + inference_time: 288.0 + throughput: 3472.222222222222 estimated_peak_memory_range: - min: 12288 - max: 9799000 + min: 16384 + max: 5124360 primary_compute_unit: NPU precision: int8 layer_info: @@ -241,14 +240,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: j56yrqdyp + job_id: jgo2l0jkp job_status: Passed torchscript_onnx_qnn: - inference_time: 295.0 - throughput: 3389.830508474576 + inference_time: 305.0 + throughput: 3278.688524590164 estimated_peak_memory_range: min: 184320 - max: 1751152 + max: 1807536 primary_compute_unit: NPU precision: int8 layer_info: @@ -256,7 +255,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: j5we8m145 + job_id: jpxkljwl5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -265,13 +264,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T04:02:35Z' + timestamp: '2024-12-12T01:31:29Z' - torchscript_onnx_tflite: - inference_time: 3761.0 - throughput: 265.8867322520606 + inference_time: 3753.0 + throughput: 266.4535038635758 estimated_peak_memory_range: - min: 12288 - max: 11992832 + min: 16384 + max: 14807232 primary_compute_unit: NPU precision: int8 layer_info: @@ -279,14 +278,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jp3jxqwng + job_id: jpv6lojr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3969.0 - throughput: 251.95263290501387 + inference_time: 3940.0 + throughput: 253.80710659898477 estimated_peak_memory_range: - min: 86016 - max: 5873728 + min: 163840 + max: 10658640 primary_compute_unit: NPU precision: int8 layer_info: @@ -294,7 +293,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jp147qvnp + job_id: jgn6zyjq5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -303,13 +302,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T04:02:38Z' + timestamp: '2024-12-12T01:31:32Z' - torchscript_onnx_tflite: - inference_time: 285.0 - throughput: 3508.7719298245615 + inference_time: 281.0 + throughput: 3558.7188612099644 estimated_peak_memory_range: - min: 0 - max: 119715376 + min: 28672 + max: 10114656 primary_compute_unit: NPU precision: int8 layer_info: @@ -317,14 +316,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jgo2oe4kp + job_id: jgjvrmjeg job_status: Passed torchscript_onnx_qnn: - inference_time: 305.0 - throughput: 3278.688524590164 + inference_time: 306.0 + throughput: 3267.97385620915 estimated_peak_memory_range: min: 184320 - max: 1304712 + max: 1497944 primary_compute_unit: NPU precision: int8 layer_info: @@ -332,7 +331,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jgdx87z6p + job_id: jprvlqz7g job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -341,13 +340,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T04:02:40Z' + timestamp: '2024-12-12T01:31:34Z' - torchscript_onnx_tflite: - inference_time: 643.0 - throughput: 1555.2099533437015 + inference_time: 667.0 + throughput: 1499.2503748125937 estimated_peak_memory_range: - min: 12288 - max: 10261184 + min: 16384 + max: 10137648 primary_compute_unit: NPU precision: int8 layer_info: @@ -355,14 +354,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jpv6ez9r5 + job_id: jped71jv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 849.0 - throughput: 1177.8563015312131 + inference_time: 1089.0 + throughput: 918.2736455463728 estimated_peak_memory_range: min: 0 - max: 5825888 + max: 5960480 primary_compute_unit: NPU precision: int8 layer_info: @@ -370,7 +369,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: j57ykv7n5 + job_id: jp2kr62qp job_status: Passed reference_device_info: name: SA8295P ADP @@ -379,13 +378,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T04:02:42Z' + timestamp: '2024-12-12T01:31:36Z' - torchscript_onnx_tflite: - inference_time: 292.0 - throughput: 3424.6575342465753 + inference_time: 279.0 + throughput: 3584.2293906810037 estimated_peak_memory_range: - min: 16384 - max: 107113712 + min: 12288 + max: 5202584 primary_compute_unit: NPU precision: int8 layer_info: @@ -393,14 +392,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jgjvokweg + job_id: jgz3l91x5 job_status: Passed torchscript_onnx_qnn: - inference_time: 305.0 - throughput: 3278.688524590164 + inference_time: 304.0 + throughput: 3289.4736842105262 estimated_peak_memory_range: - min: 188416 - max: 1734832 + min: 176128 + max: 1384752 primary_compute_unit: NPU precision: int8 layer_info: @@ -408,7 +407,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jp4lmj925 + job_id: jpy1ow9lp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -417,13 +416,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T04:02:44Z' + timestamp: '2024-12-12T01:31:38Z' - torchscript_onnx_tflite: - inference_time: 579.0 - throughput: 1727.1157167530225 + inference_time: 576.0 + throughput: 1736.111111111111 estimated_peak_memory_range: - min: 16384 - max: 10177536 + min: 12288 + max: 16400176 primary_compute_unit: NPU precision: int8 layer_info: @@ -431,14 +430,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jped84lv5 + job_id: j5welvjm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 741.0 - throughput: 1349.527665317139 + inference_time: 777.0 + throughput: 1287.001287001287 estimated_peak_memory_range: - min: 0 - max: 5855392 + min: 163840 + max: 6117664 primary_compute_unit: NPU precision: int8 layer_info: @@ -446,7 +445,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jpxk3ed85 + job_id: jp0zmqnn5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -455,13 +454,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T04:02:46Z' + timestamp: '2024-12-12T01:31:40Z' - torchscript_onnx_tflite: - inference_time: 349.0 - throughput: 2865.3295128939826 + inference_time: 343.0 + throughput: 2915.451895043732 estimated_peak_memory_range: - min: 16384 - max: 15024832 + min: 0 + max: 16507344 primary_compute_unit: NPU precision: int8 layer_info: @@ -469,14 +468,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jgz38v4x5 + job_id: jg9lz168g job_status: Passed torchscript_onnx_qnn: - inference_time: 413.0 - throughput: 2421.3075060532688 + inference_time: 411.0 + throughput: 2433.0900243309 estimated_peak_memory_range: - min: 167936 - max: 16427504 + min: 163840 + max: 19048144 primary_compute_unit: NPU precision: int8 layer_info: @@ -484,7 +483,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: j5mnovd7p + job_id: jp8qe9lop job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -493,10 +492,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T04:02:48Z' + timestamp: '2024-12-12T01:31:42Z' - torchscript_onnx_qnn: - inference_time: 408.0 - throughput: 2450.9803921568628 + inference_time: 426.0 + throughput: 2347.417840375587 estimated_peak_memory_range: min: 499712 max: 499712 @@ -507,22 +506,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jg9lk9xmg - job_status: Passed - torchscript_onnx: - inference_time: 150249.0 - throughput: 6.655618340221898 - estimated_peak_memory_range: - min: 60461056 - max: 60461056 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 319 - layers_on_gpu: 0 - layers_on_cpu: 57 - total_layers: 376 - job_id: jpy1qv70p + job_id: j5mn02j9p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -531,4 +515,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T04:02:56Z' + timestamp: '2024-12-12T01:31:31Z' diff --git a/qai_hub_models/models/hrnet_pose/perf.yaml b/qai_hub_models/models/hrnet_pose/perf.yaml index 64dfa338..9d5b8ae7 100644 --- a/qai_hub_models/models/hrnet_pose/perf.yaml +++ b/qai_hub_models/models/hrnet_pose/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: HRNetPose universal_assets: - torchscript_onnx_tflite: mnjxkp61q - torchscript_onnx: mn7lk6ojq + torchscript_onnx_tflite: mn4l105vq + torchscript_onnx: mno63lekn performance_metrics: - torchscript_onnx_tflite: - inference_time: 2853.0 - throughput: 350.5082369435682 + inference_time: 2849.0 + throughput: 351.000351000351 estimated_peak_memory_range: - min: 16384 - max: 61536624 + min: 20480 + max: 62191712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 516 - job_id: jp8q6mn8p + job_id: j57ye13q5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2959.0 - throughput: 337.95201081446436 + inference_time: 2951.0 + throughput: 338.8681802778719 estimated_peak_memory_range: - min: 45056 - max: 27175656 + min: 614400 + max: 27488280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: j5we83kj5 + job_id: jgke21xyg job_status: Passed torchscript_onnx: - inference_time: 2914.0 - throughput: 343.17089910775564 + inference_time: 2932.0 + throughput: 341.06412005457025 estimated_peak_memory_range: - min: 12288 - max: 623963832 + min: 16384 + max: 60826792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 749 - job_id: j5mno6jwp + job_id: jgdxdk1kp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:14:39Z' + timestamp: '2024-12-11T23:41:39Z' - torchscript_onnx_tflite: - inference_time: 2051.0 - throughput: 487.56704046806436 + inference_time: 2072.0 + throughput: 482.6254826254826 estimated_peak_memory_range: - min: 16384 - max: 38770592 + min: 45056 + max: 40572928 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 516 - job_id: jgkeoq1og + job_id: jp4ly60q5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2176.0 - throughput: 459.55882352941177 + inference_time: 2118.0 + throughput: 472.14353163361665 estimated_peak_memory_range: min: 0 - max: 32977744 + max: 38240592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: jg9lkyrvg + job_id: jglvydme5 job_status: Passed torchscript_onnx: - inference_time: 2200.0 - throughput: 454.54545454545456 + inference_time: 2159.0 + throughput: 463.1773969430292 estimated_peak_memory_range: - min: 606208 - max: 158429088 + min: 0 + max: 156061904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 749 - job_id: jgn6omjr5 + job_id: j57yemrq5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:14:41Z' + timestamp: '2024-12-11T23:41:40Z' - torchscript_onnx_tflite: - inference_time: 1689.0 - throughput: 592.0663114268798 + inference_time: 1964.0 + throughput: 509.1649694501018 estimated_peak_memory_range: min: 12288 - max: 35097232 + max: 37478768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 516 - job_id: j5q6zrnmp + job_id: jpxkl82j5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2036.0 - throughput: 491.1591355599214 + inference_time: 2034.0 + throughput: 491.6420845624385 estimated_peak_memory_range: min: 0 - max: 33603472 + max: 35623104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: jp147w9lp + job_id: j56y8x4vp job_status: Passed torchscript_onnx: - inference_time: 2141.0 - throughput: 467.07146193367583 + inference_time: 2138.0 + throughput: 467.7268475210477 estimated_peak_memory_range: min: 0 - max: 75561472 + max: 77135232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 749 - job_id: jprvo2z9g + job_id: jp4ly7rq5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:14:42Z' + timestamp: '2024-12-11T23:41:41Z' - torchscript_onnx_tflite: - inference_time: 2818.0 - throughput: 354.86160397444996 + inference_time: 2817.0 + throughput: 354.98757543485976 estimated_peak_memory_range: - min: 225280 - max: 51171320 + min: 24576 + max: 40368032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 516 - job_id: jglvo2dl5 + job_id: j5mn01yyp job_status: Passed torchscript_onnx_qnn: - inference_time: 2706.0 - throughput: 369.5491500369549 + inference_time: 2751.0 + throughput: 363.50418029807344 estimated_peak_memory_range: - min: 618496 - max: 1946200 + min: 626688 + max: 1802176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: jgdx8qklp + job_id: jgo2lx64p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:14:30Z' - - torchscript_onnx_qnn: - inference_time: 103120.0 - throughput: 9.69743987587277 + timestamp: '2024-12-11T23:41:30Z' + - torchscript_onnx_tflite: + inference_time: 103088.0 + throughput: 9.70045010088468 + estimated_peak_memory_range: + min: 0 + max: 34837824 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 516 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 516 + job_id: jgn6z4vv5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 103096.0 + throughput: 9.699697369442074 estimated_peak_memory_range: - min: 1073152 - max: 6852352 + min: 733184 + max: 11050896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: jg9lky6lg + job_id: jgjvr9n7g job_status: Passed reference_device_info: name: SA7255P ADP @@ -266,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:14:32Z' + timestamp: '2024-12-11T23:41:32Z' - torchscript_onnx_tflite: - inference_time: 2834.0 - throughput: 352.85815102328866 + inference_time: 2828.0 + throughput: 353.6067892503536 estimated_peak_memory_range: - min: 20480 - max: 40700504 + min: 16384 + max: 51094136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 516 - job_id: jgo2onxdp + job_id: jprvlr3vg job_status: Passed torchscript_onnx_qnn: inference_time: 2743.0 throughput: 364.5643456069996 estimated_peak_memory_range: - min: 643072 - max: 2011064 + min: 659456 + max: 1821664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: jp147wr2p + job_id: jped7qm75 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -304,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:14:33Z' + timestamp: '2024-12-11T23:41:33Z' - torchscript_onnx_tflite: - inference_time: 4653.0 - throughput: 214.9151085321298 + inference_time: 4613.0 + throughput: 216.77866897897246 estimated_peak_memory_range: - min: 45056 - max: 53135680 + min: 16384 + max: 29249216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 516 - job_id: jgn691xr5 + job_id: jp2kr1yxp job_status: Passed torchscript_onnx_qnn: - inference_time: 5172.0 - throughput: 193.34880123743233 + inference_time: 4705.0 + throughput: 212.53985122210415 estimated_peak_memory_range: - min: 643072 - max: 6389472 + min: 606208 + max: 6763456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: jgdx8qjep + job_id: jgz3l6dz5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -342,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:14:35Z' + timestamp: '2024-12-11T23:41:34Z' - torchscript_onnx_tflite: - inference_time: 2855.0 - throughput: 350.2626970227671 + inference_time: 2814.0 + throughput: 355.36602700781805 estimated_peak_memory_range: min: 20480 - max: 61266688 + max: 28098600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 516 - job_id: jgjvo298g + job_id: jpy1ol3rp job_status: Passed torchscript_onnx_qnn: - inference_time: 2713.0 - throughput: 368.59565057132323 + inference_time: 2757.0 + throughput: 362.71309394269133 estimated_peak_memory_range: - min: 622592 - max: 1885000 + min: 663552 + max: 1873936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: j57yklql5 + job_id: j5welk6z5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -380,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:14:36Z' + timestamp: '2024-12-11T23:41:36Z' - torchscript_onnx_tflite: - inference_time: 5449.0 - throughput: 183.5199119104423 + inference_time: 5440.0 + throughput: 183.8235294117647 estimated_peak_memory_range: - min: 36864 - max: 34190928 + min: 16384 + max: 34786112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 516 - job_id: jped8wq05 + job_id: jp0zmw025 job_status: Passed torchscript_onnx_qnn: - inference_time: 5458.0 - throughput: 183.21729571271527 + inference_time: 5505.0 + throughput: 181.65304268846504 estimated_peak_memory_range: min: 606208 - max: 6586352 + max: 11314720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: jp4lmdzv5 + job_id: jg9lzrnqg job_status: Passed reference_device_info: name: SA8775P ADP @@ -418,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:14:37Z' + timestamp: '2024-12-11T23:41:37Z' - torchscript_onnx_tflite: - inference_time: 3800.0 - throughput: 263.1578947368421 + inference_time: 3812.0 + throughput: 262.3294858342078 estimated_peak_memory_range: - min: 16384 - max: 31399760 + min: 32768 + max: 35722384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 516 - job_id: jgz38j665 + job_id: jp8qenyzp job_status: Passed torchscript_onnx_qnn: - inference_time: 3800.0 - throughput: 263.1578947368421 + inference_time: 3936.0 + throughput: 254.0650406504065 estimated_peak_memory_range: - min: 606208 - max: 29948272 + min: 409600 + max: 30917440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: jpxk36w15 + job_id: jp14n9zkp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -456,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:14:38Z' + timestamp: '2024-12-11T23:41:38Z' - torchscript_onnx_qnn: - inference_time: 3023.0 - throughput: 330.7972213033411 + inference_time: 2961.0 + throughput: 337.7237419790611 estimated_peak_memory_range: min: 589824 max: 589824 @@ -470,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: j5we83j65 + job_id: jpv6l8k75 job_status: Passed torchscript_onnx: - inference_time: 3040.0 - throughput: 328.94736842105266 + inference_time: 2984.0 + throughput: 335.1206434316354 estimated_peak_memory_range: - min: 58368000 - max: 58368000 + min: 60174336 + max: 60174336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -485,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 749 - job_id: jp2k4924p + job_id: jpxklqoj5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -494,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:14:43Z' + timestamp: '2024-12-11T23:41:42Z' diff --git a/qai_hub_models/models/hrnet_pose_quantized/info.yaml b/qai_hub_models/models/hrnet_pose_quantized/info.yaml index 8b206085..65f6d546 100644 --- a/qai_hub_models/models/hrnet_pose_quantized/info.yaml +++ b/qai_hub_models/models/hrnet_pose_quantized/info.yaml @@ -8,6 +8,7 @@ use_case: Pose Estimation description: HRNet performs pose estimation in high-resolution representations. tags: - quantized +imsdk_supported: true research_paper: https://arxiv.org/abs/1902.09212 research_paper_title: Deep High-Resolution Representation Learning for Human Pose Estimation diff --git a/qai_hub_models/models/hrnet_pose_quantized/perf.yaml b/qai_hub_models/models/hrnet_pose_quantized/perf.yaml index 63fa317f..69b10c43 100644 --- a/qai_hub_models/models/hrnet_pose_quantized/perf.yaml +++ b/qai_hub_models/models/hrnet_pose_quantized/perf.yaml @@ -47,14 +47,14 @@ aggregated: models: - name: HRNetPoseQuantized universal_assets: - torchscript_onnx_tflite: mn41dwrrn + torchscript_onnx_tflite: mn0jx1v9m performance_metrics: - torchscript_onnx_tflite: - inference_time: 970.0 - throughput: 1030.9278350515465 + inference_time: 950.0 + throughput: 1052.6315789473683 estimated_peak_memory_range: - min: 20480 - max: 22541008 + min: 16384 + max: 23331688 primary_compute_unit: NPU precision: int8 layer_info: @@ -62,14 +62,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 518 - job_id: jpxk36895 + job_id: jp3jzl3xg job_status: Passed torchscript_onnx_qnn: - inference_time: 1251.0 - throughput: 799.3605115907275 + inference_time: 1240.0 + throughput: 806.4516129032259 estimated_peak_memory_range: - min: 16384 - max: 24626392 + min: 12288 + max: 11140432 primary_compute_unit: NPU precision: int8 layer_info: @@ -77,7 +77,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 748 - job_id: jp3jx1lzg + job_id: j5mn012yp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -86,13 +86,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:13:30Z' + timestamp: '2024-12-11T23:40:30Z' - torchscript_onnx_tflite: - inference_time: 690.0 - throughput: 1449.2753623188405 + inference_time: 706.0 + throughput: 1416.4305949008499 estimated_peak_memory_range: min: 16384 - max: 36284176 + max: 41065472 primary_compute_unit: NPU precision: int8 layer_info: @@ -100,14 +100,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 518 - job_id: j5mno61qp + job_id: jgo2l704p job_status: Passed torchscript_onnx_qnn: - inference_time: 912.0 - throughput: 1096.4912280701753 + inference_time: 915.0 + throughput: 1092.896174863388 estimated_peak_memory_range: min: 0 - max: 35724768 + max: 35838000 primary_compute_unit: NPU precision: int8 layer_info: @@ -115,7 +115,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 748 - job_id: jgo2on7dp + job_id: jgn6zd8v5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -124,13 +124,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:13:32Z' + timestamp: '2024-12-11T23:40:32Z' - torchscript_onnx_tflite: - inference_time: 647.0 - throughput: 1545.595054095827 + inference_time: 645.0 + throughput: 1550.3875968992247 estimated_peak_memory_range: - min: 12288 - max: 34559664 + min: 8192 + max: 37426224 primary_compute_unit: NPU precision: int8 layer_info: @@ -138,14 +138,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 518 - job_id: jgn6omdm5 + job_id: jpv6lyo75 job_status: Passed torchscript_onnx_qnn: - inference_time: 870.0 - throughput: 1149.4252873563219 + inference_time: 888.0 + throughput: 1126.126126126126 estimated_peak_memory_range: min: 0 - max: 32210784 + max: 33571248 primary_compute_unit: NPU precision: int8 layer_info: @@ -153,7 +153,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 748 - job_id: jpv6erym5 + job_id: jprvlmjvg job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -162,13 +162,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:13:33Z' + timestamp: '2024-12-11T23:40:33Z' - torchscript_onnx_tflite: - inference_time: 3657.0 - throughput: 273.4481815695926 + inference_time: 3735.0 + throughput: 267.7376171352075 estimated_peak_memory_range: min: 12288 - max: 42870832 + max: 46275920 primary_compute_unit: NPU precision: int8 layer_info: @@ -176,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 518 - job_id: jprvo2meg + job_id: jgjvr6m7g job_status: Passed torchscript_onnx_qnn: - inference_time: 5379.0 - throughput: 185.90816136828406 + inference_time: 5349.0 + throughput: 186.9508319312021 estimated_peak_memory_range: - min: 204800 - max: 7882496 + min: 163840 + max: 7679280 primary_compute_unit: NPU precision: int8 layer_info: @@ -191,7 +191,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 748 - job_id: jgjvo268g + job_id: jp2krqnxp job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -200,13 +200,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T02:13:34Z' + timestamp: '2024-12-11T23:40:34Z' - torchscript_onnx_tflite: - inference_time: 17037.0 - throughput: 58.69577977343429 + inference_time: 17373.0 + throughput: 57.560582513095035 estimated_peak_memory_range: - min: 102400 - max: 6897968 + min: 61440 + max: 2970144 primary_compute_unit: NPU precision: int8 layer_info: @@ -214,7 +214,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 518 - job_id: jp2k49qmp + job_id: jped70175 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -223,13 +223,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T02:13:22Z' + timestamp: '2024-12-11T23:40:21Z' - torchscript_onnx_tflite: - inference_time: 952.0 - throughput: 1050.420168067227 + inference_time: 954.0 + throughput: 1048.2180293501049 estimated_peak_memory_range: min: 16384 - max: 18186840 + max: 12037104 primary_compute_unit: NPU precision: int8 layer_info: @@ -237,14 +237,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 518 - job_id: jpy1qjk4p + job_id: jgz3lq9z5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1215.0 - throughput: 823.0452674897119 + inference_time: 1218.0 + throughput: 821.0180623973728 estimated_peak_memory_range: min: 180224 - max: 1757832 + max: 1844512 primary_compute_unit: NPU precision: int8 layer_info: @@ -252,7 +252,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 748 - job_id: jped8w005 + job_id: jpy1ok0rp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -261,13 +261,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:13:36Z' - - torchscript_onnx_qnn: - inference_time: 14378.0 - throughput: 69.55070246209486 + timestamp: '2024-12-11T23:40:35Z' + - torchscript_onnx_tflite: + inference_time: 13862.0 + throughput: 72.13966238638004 + estimated_peak_memory_range: + min: 40960 + max: 34261808 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 518 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 518 + job_id: j5wel0vz5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 14372.0 + throughput: 69.57973838018368 estimated_peak_memory_range: - min: 143360 - max: 5811632 + min: 118784 + max: 10789520 primary_compute_unit: NPU precision: int8 layer_info: @@ -275,7 +290,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 748 - job_id: j5we830j5 + job_id: jp8qedvzp job_status: Passed reference_device_info: name: SA7255P ADP @@ -284,13 +299,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:13:38Z' + timestamp: '2024-12-11T23:40:38Z' - torchscript_onnx_tflite: - inference_time: 954.0 - throughput: 1048.2180293501049 + inference_time: 962.0 + throughput: 1039.5010395010395 estimated_peak_memory_range: - min: 20480 - max: 12827912 + min: 16384 + max: 20366920 primary_compute_unit: NPU precision: int8 layer_info: @@ -298,14 +313,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 518 - job_id: jp8q6md8p + job_id: jg9lz71qg job_status: Passed torchscript_onnx_qnn: - inference_time: 1224.0 - throughput: 816.9934640522875 + inference_time: 1223.0 + throughput: 817.6614881439084 estimated_peak_memory_range: - min: 184320 - max: 1433896 + min: 196608 + max: 1412496 primary_compute_unit: NPU precision: int8 layer_info: @@ -313,7 +328,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 748 - job_id: jg9lky7vg + job_id: jgke2wmyg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -322,13 +337,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:13:39Z' + timestamp: '2024-12-11T23:40:39Z' - torchscript_onnx_tflite: - inference_time: 1662.0 - throughput: 601.6847172081829 + inference_time: 1675.0 + throughput: 597.0149253731344 estimated_peak_memory_range: min: 16384 - max: 32211488 + max: 37915952 primary_compute_unit: NPU precision: int8 layer_info: @@ -336,14 +351,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 518 - job_id: jgkeoqwog + job_id: jp14nklkp job_status: Passed torchscript_onnx_qnn: inference_time: 1997.0 throughput: 500.75112669003505 estimated_peak_memory_range: min: 0 - max: 5908352 + max: 6313040 primary_compute_unit: NPU precision: int8 layer_info: @@ -351,7 +366,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 748 - job_id: jp147wklp + job_id: j5q6lxo7p job_status: Passed reference_device_info: name: SA8295P ADP @@ -360,13 +375,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:13:41Z' + timestamp: '2024-12-11T23:40:40Z' - torchscript_onnx_tflite: - inference_time: 957.0 - throughput: 1044.932079414838 + inference_time: 951.0 + throughput: 1051.5247108307046 estimated_peak_memory_range: - min: 16384 - max: 10596960 + min: 12288 + max: 16595984 primary_compute_unit: NPU precision: int8 layer_info: @@ -374,14 +389,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 518 - job_id: j5q6zrxmp + job_id: jgdxdy9kp job_status: Passed torchscript_onnx_qnn: - inference_time: 1220.0 - throughput: 819.672131147541 + inference_time: 1211.0 + throughput: 825.7638315441784 estimated_peak_memory_range: - min: 192512 - max: 1482408 + min: 204800 + max: 1436816 primary_compute_unit: NPU precision: int8 layer_info: @@ -389,7 +404,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 748 - job_id: jgdx8qylp + job_id: jglvy9re5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -398,13 +413,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:13:42Z' + timestamp: '2024-12-11T23:40:41Z' - torchscript_onnx_tflite: - inference_time: 1452.0 - throughput: 688.7052341597796 + inference_time: 1455.0 + throughput: 687.2852233676975 estimated_peak_memory_range: min: 16384 - max: 34030944 + max: 37604112 primary_compute_unit: NPU precision: int8 layer_info: @@ -412,14 +427,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 518 - job_id: jglvo29l5 + job_id: jp4ly6oq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1902.0 - throughput: 525.7623554153523 + inference_time: 1913.0 + throughput: 522.7391531625718 estimated_peak_memory_range: - min: 167936 - max: 5833664 + min: 163840 + max: 6321904 primary_compute_unit: NPU precision: int8 layer_info: @@ -427,7 +442,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 748 - job_id: j57yklmr5 + job_id: j56y89lvp job_status: Passed reference_device_info: name: SA8775P ADP @@ -436,13 +451,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:13:43Z' + timestamp: '2024-12-11T23:40:42Z' - torchscript_onnx_tflite: - inference_time: 1219.0 - throughput: 820.3445447087777 + inference_time: 1216.0 + throughput: 822.3684210526316 estimated_peak_memory_range: min: 16384 - max: 39082800 + max: 44890240 primary_compute_unit: NPU precision: int8 layer_info: @@ -450,14 +465,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 518 - job_id: j56yrz97p + job_id: jpxkl8jj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1514.0 - throughput: 660.5019815059445 + inference_time: 1481.0 + throughput: 675.219446320054 estimated_peak_memory_range: - min: 163840 - max: 39280176 + min: 167936 + max: 40778704 primary_compute_unit: NPU precision: int8 layer_info: @@ -465,7 +480,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 748 - job_id: jp4lmd7l5 + job_id: jp3jzl2xg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -474,13 +489,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:13:44Z' + timestamp: '2024-12-11T23:40:43Z' - torchscript_onnx_qnn: - inference_time: 1372.0 - throughput: 728.862973760933 + inference_time: 1347.0 + throughput: 742.3904974016333 estimated_peak_memory_range: - min: 307200 - max: 307200 + min: 294912 + max: 294912 primary_compute_unit: NPU precision: int8 layer_info: @@ -488,7 +503,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 748 - job_id: jgz38jq65 + job_id: jp0zm8725 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -497,4 +512,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:13:37Z' + timestamp: '2024-12-11T23:40:36Z' diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/export.py b/qai_hub_models/models/huggingface_wavlm_base_plus/export.py index 1559233b..99cf89fa 100644 --- a/qai_hub_models/models/huggingface_wavlm_base_plus/export.py +++ b/qai_hub_models/models/huggingface_wavlm_base_plus/export.py @@ -202,7 +202,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False, supports_onnx=False) + parser = export_parser(model_cls=Model, supports_qnn=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/model.py b/qai_hub_models/models/huggingface_wavlm_base_plus/model.py index 1bb076a2..5a1ffe7f 100644 --- a/qai_hub_models/models/huggingface_wavlm_base_plus/model.py +++ b/qai_hub_models/models/huggingface_wavlm_base_plus/model.py @@ -10,7 +10,7 @@ from transformers import WavLMModel from transformers.models.wavlm.modeling_wavlm import WavLMGroupNormConvLayer -from qai_hub_models.utils.base_model import BaseModel, TargetRuntime +from qai_hub_models.utils.base_model import BaseModel from qai_hub_models.utils.input_spec import InputSpec OPENPOSE_SOURCE_REPOSITORY = ( @@ -78,19 +78,6 @@ def get_input_spec( def get_output_names() -> list[str]: return ["feature_vector_1", "feature_vector_2"] - def get_hub_profile_options( - self, target_runtime: TargetRuntime, other_profile_options: str = "" - ) -> str: - profile_options = super().get_hub_profile_options( - target_runtime, other_profile_options - ) - if ( - target_runtime == TargetRuntime.TFLITE - and "--compute_unit" not in profile_options - ): - profile_options = profile_options + " --compute_unit gpu" - return profile_options - # Modules used to override Huggingface WavLM to be NPU friendly class SliceConv1d(torch.nn.Module): diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml b/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml index 0da911f3..08fb5b77 100644 --- a/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml +++ b/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml @@ -22,7 +22,6 @@ aggregated: - QCS8450 (Proxy) - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) - - SA7255P ADP - SA8255 (Proxy) - SA8295P ADP - SA8650 (Proxy) @@ -37,29 +36,44 @@ aggregated: - SA8775P - QCS8450 Proxy - QCS8550 Proxy - - SA7255P - SA8255P Proxy - SA8295P - SA8650P Proxy models: - name: HuggingFace-WavLM-Base-Plus universal_assets: - torchscript_onnx_tflite: mm5dw614n + torchscript_onnx_tflite: mq8dkrkpm + torchscript_onnx: mn0jx1xzm performance_metrics: - torchscript_onnx_tflite: - inference_time: 768657.0 - throughput: 1.3009703938167478 + inference_time: 165915.0 + throughput: 6.02718259349667 estimated_peak_memory_range: - min: 65585152 - max: 107868072 - primary_compute_unit: CPU - precision: fp32 + min: 135168 + max: 55395064 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 + layers_on_npu: 871 layers_on_gpu: 0 - layers_on_cpu: 871 + layers_on_cpu: 0 total_layers: 871 - job_id: j5mno6lqp + job_id: jgo2l7y1p + job_status: Passed + torchscript_onnx: + inference_time: 180343.0 + throughput: 5.544989270445762 + estimated_peak_memory_range: + min: 126976 + max: 238126416 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 686 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 686 + job_id: jpxkl8vj5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -68,21 +82,36 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:10:47Z' + timestamp: '2024-12-11T23:38:04Z' - torchscript_onnx_tflite: - inference_time: 818517.0 - throughput: 1.2217217235561386 + inference_time: 123043.0 + throughput: 8.127240070544444 estimated_peak_memory_range: - min: 33136640 - max: 56495632 - primary_compute_unit: CPU - precision: fp32 + min: 1359872 + max: 205352976 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 + layers_on_npu: 871 layers_on_gpu: 0 - layers_on_cpu: 871 + layers_on_cpu: 0 total_layers: 871 - job_id: jgn6omwm5 + job_id: jpv6ly3z5 + job_status: Passed + torchscript_onnx: + inference_time: 131349.0 + throughput: 7.61330501183869 + estimated_peak_memory_range: + min: 3129344 + max: 1745986848 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 686 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 686 + job_id: j5mn01ryp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -91,21 +120,36 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:10:49Z' + timestamp: '2024-12-11T23:38:05Z' - torchscript_onnx_tflite: - inference_time: 543868.0 - throughput: 1.838681444762332 + inference_time: 116806.0 + throughput: 8.561204047737274 estimated_peak_memory_range: - min: 63725568 - max: 79596416 - primary_compute_unit: CPU - precision: fp32 + min: 1376256 + max: 216130208 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 + layers_on_npu: 871 layers_on_gpu: 0 - layers_on_cpu: 871 + layers_on_cpu: 0 total_layers: 871 - job_id: jprvo27eg + job_id: jgjvr6x1g + job_status: Passed + torchscript_onnx: + inference_time: 114021.0 + throughput: 8.770314240359232 + estimated_peak_memory_range: + min: 2838528 + max: 1304495536 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 686 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 686 + job_id: jgn6zdyv5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -114,21 +158,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:10:50Z' + timestamp: '2024-12-11T23:38:06Z' - torchscript_onnx_tflite: - inference_time: 953526.0 - throughput: 1.0487391009788931 + inference_time: 163787.0 + throughput: 6.105490667757515 estimated_peak_memory_range: - min: 66052096 - max: 100841784 - primary_compute_unit: CPU - precision: fp32 + min: 114688 + max: 51620064 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 + layers_on_npu: 871 layers_on_gpu: 0 - layers_on_cpu: 871 + layers_on_cpu: 0 total_layers: 871 - job_id: jp2k49zmp + job_id: jped70985 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -137,44 +181,29 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:10:51Z' - - torchscript_onnx_tflite: - inference_time: 1567307.0 - throughput: 0.6380370916482859 - estimated_peak_memory_range: - min: 65671168 - max: 87351248 - primary_compute_unit: CPU - precision: fp32 - layer_info: - layers_on_npu: 0 - layers_on_gpu: 0 - layers_on_cpu: 871 - total_layers: 871 - job_id: jpy1qjy4p - job_status: Passed - reference_device_info: + timestamp: '2024-12-11T23:37:49Z' + - reference_device_info: name: SA7255P ADP os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:10:52Z' + timestamp: '2024-12-11T23:37:50Z' - torchscript_onnx_tflite: - inference_time: 924443.0 - throughput: 1.081732459437737 + inference_time: 164616.0 + throughput: 6.074743645818146 estimated_peak_memory_range: - min: 45674496 - max: 48409568 - primary_compute_unit: CPU - precision: fp32 + min: 131072 + max: 49430192 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 + layers_on_npu: 871 layers_on_gpu: 0 - layers_on_cpu: 871 + layers_on_cpu: 0 total_layers: 871 - job_id: jp0zd2xe5 + job_id: j5wel0o45 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -183,21 +212,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:10:53Z' + timestamp: '2024-12-11T23:37:51Z' - torchscript_onnx_tflite: - inference_time: 985456.0 - throughput: 1.014758649802731 + inference_time: 270492.0 + throughput: 3.696967008266418 estimated_peak_memory_range: - min: 65949696 - max: 85399136 - primary_compute_unit: CPU - precision: fp32 + min: 1417216 + max: 205612160 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 + layers_on_npu: 871 layers_on_gpu: 0 - layers_on_cpu: 871 + layers_on_cpu: 0 total_layers: 871 - job_id: jp8q6mk8p + job_id: jg9lz7vmg job_status: Passed reference_device_info: name: SA8295P ADP @@ -206,21 +235,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:10:54Z' + timestamp: '2024-12-11T23:37:52Z' - torchscript_onnx_tflite: - inference_time: 726672.0 - throughput: 1.3761366889050355 + inference_time: 163370.0 + throughput: 6.121074860745547 estimated_peak_memory_range: - min: 65933312 - max: 68806080 - primary_compute_unit: CPU - precision: fp32 + min: 45056 + max: 59313088 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 + layers_on_npu: 871 layers_on_gpu: 0 - layers_on_cpu: 871 + layers_on_cpu: 0 total_layers: 871 - job_id: jgkeoqkog + job_id: jp14nk0np job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -229,21 +258,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:10:55Z' + timestamp: '2024-12-11T23:37:53Z' - torchscript_onnx_tflite: - inference_time: 1317403.0 - throughput: 0.7590691686598557 + inference_time: 224491.0 + throughput: 4.454521562111621 estimated_peak_memory_range: - min: 65642496 - max: 87458400 - primary_compute_unit: CPU - precision: fp32 + min: 1351680 + max: 213801024 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 + layers_on_npu: 871 layers_on_gpu: 0 - layers_on_cpu: 871 + layers_on_cpu: 0 total_layers: 871 - job_id: j5q6zrdmp + job_id: jgdxdyw6p job_status: Passed reference_device_info: name: SA8775P ADP @@ -252,21 +281,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:10:56Z' + timestamp: '2024-12-11T23:37:55Z' - torchscript_onnx_tflite: - inference_time: 1062167.0 - throughput: 0.941471538844645 + inference_time: 249724.0 + throughput: 4.00442088065224 estimated_peak_memory_range: - min: 66699264 - max: 93358416 - primary_compute_unit: CPU - precision: fp32 + min: 1404928 + max: 200416224 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 + layers_on_npu: 871 layers_on_gpu: 0 - layers_on_cpu: 871 + layers_on_cpu: 0 total_layers: 871 - job_id: jglvo2ql5 + job_id: j5wel0oz5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -275,4 +304,27 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:10:57Z' + timestamp: '2024-12-11T23:37:56Z' + - torchscript_onnx: + inference_time: 186000.0 + throughput: 5.376344086021505 + estimated_peak_memory_range: + min: 216260608 + max: 216260608 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 686 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 686 + job_id: jprvlmqvg + job_status: Passed + reference_device_info: + name: Snapdragon X Elite CRD + os: '11' + form_factor: Compute + os_name: Windows + manufacturer: Qualcomm + chipset: Snapdragon® X Elite + timestamp: '2024-12-11T23:38:07Z' diff --git a/qai_hub_models/models/inception_v3/perf.yaml b/qai_hub_models/models/inception_v3/perf.yaml index b1478061..28871bfb 100644 --- a/qai_hub_models/models/inception_v3/perf.yaml +++ b/qai_hub_models/models/inception_v3/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: Inception-v3 universal_assets: - torchscript_onnx_tflite: mm6v4zg4q - torchscript_onnx: mnzv439xq + torchscript_onnx_tflite: mn0jxe28m + torchscript_onnx: mqkvk28zm performance_metrics: - torchscript_onnx_tflite: - inference_time: 1329.0 - throughput: 752.4454477050414 + inference_time: 1337.0 + throughput: 747.9431563201197 estimated_peak_memory_range: - min: 16384 - max: 200662768 + min: 20480 + max: 200656704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jg9lkyovg + job_id: jp8qedlqp job_status: Passed torchscript_onnx_qnn: - inference_time: 1407.0 - throughput: 710.7320540156361 + inference_time: 1416.0 + throughput: 706.2146892655368 estimated_peak_memory_range: - min: 110592 - max: 148064064 + min: 630784 + max: 148521336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jpy1qj24p + job_id: jgz3lq145 job_status: Passed torchscript_onnx: - inference_time: 1745.0 - throughput: 573.0659025787966 + inference_time: 1750.0 + throughput: 571.4285714285714 estimated_peak_memory_range: - min: 20480 - max: 590094280 + min: 536576 + max: 2526552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jped8wx05 + job_id: jp2krq26p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:10:16Z' + timestamp: '2024-12-11T23:37:16Z' - torchscript_onnx_tflite: - inference_time: 993.0 - throughput: 1007.0493454179255 + inference_time: 1001.0 + throughput: 999.000999000999 estimated_peak_memory_range: min: 16384 - max: 20324272 + max: 22793824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jp147wolp + job_id: jgke2wjvg job_status: Passed torchscript_onnx_qnn: - inference_time: 1052.0 - throughput: 950.5703422053232 + inference_time: 1051.0 + throughput: 951.4747859181732 estimated_peak_memory_range: min: 0 - max: 20079008 + max: 23364160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jp0zd29e5 + job_id: j5wel0j45 job_status: Passed torchscript_onnx: - inference_time: 1283.0 - throughput: 779.423226812159 + inference_time: 1289.0 + throughput: 775.7951900698216 estimated_peak_memory_range: min: 0 - max: 60053024 + max: 61424400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jgz38jy65 + job_id: jpy1ok90p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:10:17Z' + timestamp: '2024-12-11T23:37:17Z' - torchscript_onnx_tflite: - inference_time: 845.0 - throughput: 1183.4319526627219 + inference_time: 862.0 + throughput: 1160.092807424594 estimated_peak_memory_range: - min: 16384 - max: 16464160 + min: 12288 + max: 18587648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jgdx8q6lp + job_id: j5q6lxjep job_status: Passed torchscript_onnx_qnn: - inference_time: 1003.0 - throughput: 997.0089730807578 + inference_time: 1041.0 + throughput: 960.6147934678194 estimated_peak_memory_range: - min: 0 - max: 15867200 + min: 614400 + max: 18500512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jp8q6mr8p + job_id: jg9lz76mg job_status: Passed torchscript_onnx: - inference_time: 1261.0 - throughput: 793.0214115781126 + inference_time: 1250.0 + throughput: 800.0 estimated_peak_memory_range: min: 0 - max: 25993360 + max: 28648528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: j5we83zj5 + job_id: jp0zm8y05 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:10:18Z' + timestamp: '2024-12-11T23:37:18Z' - torchscript_onnx_tflite: - inference_time: 1329.0 - throughput: 752.4454477050414 + inference_time: 1337.0 + throughput: 747.9431563201197 estimated_peak_memory_range: - min: 20480 - max: 200709040 + min: 16384 + max: 200684784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: j57yklnr5 + job_id: jglvy9j25 job_status: Passed torchscript_onnx_qnn: - inference_time: 1450.0 - throughput: 689.6551724137931 + inference_time: 1453.0 + throughput: 688.2312456985547 estimated_peak_memory_range: - min: 651264 - max: 1853304 + min: 634880 + max: 2192024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jgkeoq0og + job_id: jp14nkrnp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:10:07Z' + timestamp: '2024-12-11T23:37:07Z' - torchscript_onnx_tflite: - inference_time: 58004.0 - throughput: 17.24019033170126 + inference_time: 58029.0 + throughput: 17.232762928880387 estimated_peak_memory_range: - min: 94208 - max: 15035744 + min: 159744 + max: 17171392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jp4lmd4l5 + job_id: j56y89knp job_status: Passed torchscript_onnx_qnn: - inference_time: 58269.0 - throughput: 17.1617841390791 + inference_time: 58238.0 + throughput: 17.170919331020983 estimated_peak_memory_range: - min: 704512 - max: 6536096 + min: 643072 + max: 11191456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jglvo28l5 + job_id: j57ye1qn5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:10:10Z' + timestamp: '2024-12-11T23:37:09Z' - torchscript_onnx_tflite: - inference_time: 1334.0 - throughput: 749.6251874062968 + inference_time: 1337.0 + throughput: 747.9431563201197 estimated_peak_memory_range: - min: 24576 - max: 200570504 + min: 20480 + max: 200790240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jpxk36r95 + job_id: jp3jzlymg job_status: Passed torchscript_onnx_qnn: inference_time: 1458.0 throughput: 685.8710562414266 estimated_peak_memory_range: - min: 634880 - max: 1957856 + min: 626688 + max: 2050840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: j56yrzm7p + job_id: jp4ly6z25 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:10:11Z' + timestamp: '2024-12-11T23:37:11Z' - torchscript_onnx_tflite: - inference_time: 2558.0 - throughput: 390.93041438623925 + inference_time: 2572.0 + throughput: 388.8024883359254 estimated_peak_memory_range: min: 16384 - max: 14868640 + max: 17078736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: j5mno6kqp + job_id: jgo2l7j1p job_status: Passed torchscript_onnx_qnn: - inference_time: 2842.0 - throughput: 351.8648838845883 + inference_time: 2789.0 + throughput: 358.55145213338113 estimated_peak_memory_range: min: 0 - max: 6017088 + max: 6159984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jp3jx17zg + job_id: jpxkl8w85 job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:10:12Z' + timestamp: '2024-12-11T23:37:12Z' - torchscript_onnx_tflite: - inference_time: 1335.0 - throughput: 749.0636704119851 + inference_time: 1333.0 + throughput: 750.1875468867216 estimated_peak_memory_range: - min: 16384 - max: 200792600 + min: 20480 + max: 200939840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jgn6omqm5 + job_id: jpv6lyjz5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1471.0 - throughput: 679.8096532970768 + inference_time: 1467.0 + throughput: 681.6632583503749 estimated_peak_memory_range: min: 630784 - max: 1745368 + max: 1975128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jgo2onwdp + job_id: j5mn01j7p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:10:13Z' - - torchscript_onnx_qnn: - inference_time: 2864.0 - throughput: 349.16201117318434 + timestamp: '2024-12-11T23:37:13Z' + - torchscript_onnx_tflite: + inference_time: 2590.0 + throughput: 386.1003861003861 estimated_peak_memory_range: - min: 622592 - max: 6405408 + min: 40960 + max: 17257456 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 129 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 129 + job_id: jgjvr6j1g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 2861.0 + throughput: 349.5281370150297 + estimated_peak_memory_range: + min: 618496 + max: 6654400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jpv6ermm5 + job_id: jgn6zdjj5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -418,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:10:14Z' + timestamp: '2024-12-11T23:37:14Z' - torchscript_onnx_tflite: - inference_time: 2155.0 - throughput: 464.0371229698376 + inference_time: 2125.0 + throughput: 470.5882352941176 estimated_peak_memory_range: - min: 20480 - max: 22695040 + min: 16384 + max: 23616976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jp2k49dmp + job_id: jped70j85 job_status: Passed torchscript_onnx_qnn: - inference_time: 2189.0 - throughput: 456.82960255824577 + inference_time: 2205.0 + throughput: 453.51473922902494 estimated_peak_memory_range: - min: 618496 - max: 17878064 + min: 638976 + max: 24912592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jgjvo2y8g + job_id: jprvlmzkg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -456,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:10:15Z' + timestamp: '2024-12-11T23:37:15Z' - torchscript_onnx_qnn: - inference_time: 1487.0 - throughput: 672.4949562878278 + inference_time: 1491.0 + throughput: 670.690811535882 estimated_peak_memory_range: min: 602112 max: 602112 @@ -470,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: j5q6zr1mp + job_id: jgdxdyj6p job_status: Passed torchscript_onnx: - inference_time: 1656.0 - throughput: 603.864734299517 + inference_time: 1688.0 + throughput: 592.4170616113744 estimated_peak_memory_range: - min: 51060736 - max: 51060736 + min: 50384896 + max: 50384896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -485,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jg9lky2vg + job_id: jp8qedoqp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -494,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:10:19Z' + timestamp: '2024-12-11T23:37:19Z' diff --git a/qai_hub_models/models/inception_v3_quantized/evaluate.py b/qai_hub_models/models/inception_v3_quantized/evaluate.py index 6547e871..ae884138 100644 --- a/qai_hub_models/models/inception_v3_quantized/evaluate.py +++ b/qai_hub_models/models/inception_v3_quantized/evaluate.py @@ -25,6 +25,7 @@ def main(): model_cls=Model, default_split_size=2500, supported_datasets=SUPPORTED_DATASETS, + supports_onnx=False, is_hub_quantized=True, ) args = parser.parse_args() diff --git a/qai_hub_models/models/inception_v3_quantized/export.py b/qai_hub_models/models/inception_v3_quantized/export.py index 915552a6..dc3816cd 100644 --- a/qai_hub_models/models/inception_v3_quantized/export.py +++ b/qai_hub_models/models/inception_v3_quantized/export.py @@ -241,7 +241,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, is_hub_quantized=True) + parser = export_parser(model_cls=Model, supports_onnx=False, is_hub_quantized=True) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/inception_v3_quantized/info.yaml b/qai_hub_models/models/inception_v3_quantized/info.yaml index 395563e4..5391209c 100644 --- a/qai_hub_models/models/inception_v3_quantized/info.yaml +++ b/qai_hub_models/models/inception_v3_quantized/info.yaml @@ -12,6 +12,7 @@ use_case: Image Classification tags: - backbone - quantized +imsdk_supported: true research_paper: http://arxiv.org/abs/1512.00567 research_paper_title: Rethinking the Inception Architecture for Computer Vision license: https://github.com/pytorch/vision/blob/main/LICENSE diff --git a/qai_hub_models/models/inception_v3_quantized/perf.yaml b/qai_hub_models/models/inception_v3_quantized/perf.yaml index 8f7e18df..7bae69dd 100644 --- a/qai_hub_models/models/inception_v3_quantized/perf.yaml +++ b/qai_hub_models/models/inception_v3_quantized/perf.yaml @@ -50,14 +50,14 @@ aggregated: models: - name: Inception-v3-Quantized universal_assets: - torchscript_onnx_tflite: mqy3w6r5m + torchscript_onnx_tflite: mqp3z0z0m performance_metrics: - torchscript_onnx_tflite: - inference_time: 639.0 - throughput: 1564.9452269170579 + inference_time: 644.0 + throughput: 1552.7950310559006 estimated_peak_memory_range: min: 16384 - max: 241383760 + max: 38744792 primary_compute_unit: NPU precision: int8 layer_info: @@ -65,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: j57ykv695 + job_id: j57yew195 job_status: Passed torchscript_onnx_qnn: - inference_time: 651.0 - throughput: 1536.0983102918588 + inference_time: 644.0 + throughput: 1552.7950310559006 estimated_peak_memory_range: - min: 28672 - max: 28317976 + min: 24576 + max: 28655928 primary_compute_unit: NPU precision: int8 layer_info: @@ -80,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jglvoeom5 + job_id: jp3jz3dng job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -89,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T04:01:37Z' + timestamp: '2024-12-12T01:30:11Z' - torchscript_onnx_tflite: - inference_time: 454.0 - throughput: 2202.643171806167 + inference_time: 482.0 + throughput: 2074.688796680498 estimated_peak_memory_range: - min: 12288 - max: 16205200 + min: 16384 + max: 26237760 primary_compute_unit: NPU precision: int8 layer_info: @@ -103,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jp4lmj815 + job_id: jp4lyo615 job_status: Passed torchscript_onnx_qnn: - inference_time: 503.0 - throughput: 1988.0715705765408 + inference_time: 495.0 + throughput: 2020.20202020202 estimated_peak_memory_range: min: 167936 - max: 18342048 + max: 24592080 primary_compute_unit: NPU precision: int8 layer_info: @@ -118,7 +118,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: j56yrqryp + job_id: jgo2l0xkp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -127,13 +127,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T04:01:39Z' + timestamp: '2024-12-12T01:30:13Z' - torchscript_onnx_tflite: - inference_time: 450.0 - throughput: 2222.222222222222 + inference_time: 448.0 + throughput: 2232.1428571428573 estimated_peak_memory_range: - min: 12288 - max: 16923376 + min: 8192 + max: 20765328 primary_compute_unit: NPU precision: int8 layer_info: @@ -141,14 +141,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jpxk3eml5 + job_id: jpxklj8l5 job_status: Passed torchscript_onnx_qnn: - inference_time: 486.0 - throughput: 2057.61316872428 + inference_time: 492.0 + throughput: 2032.520325203252 estimated_peak_memory_range: min: 0 - max: 16177136 + max: 16355472 primary_compute_unit: NPU precision: int8 layer_info: @@ -156,7 +156,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jp3jxqxng + job_id: jpv6lo8r5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -165,13 +165,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T04:01:41Z' + timestamp: '2024-12-12T01:30:15Z' - torchscript_onnx_tflite: - inference_time: 2443.0 - throughput: 409.3327875562833 + inference_time: 2649.0 + throughput: 377.5009437523594 estimated_peak_memory_range: - min: 16384 - max: 26363680 + min: 12288 + max: 23389344 primary_compute_unit: NPU precision: int8 layer_info: @@ -179,14 +179,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: j5mnov49p + job_id: j5mn0219p job_status: Passed torchscript_onnx_qnn: - inference_time: 2856.0 - throughput: 350.14005602240894 + inference_time: 2904.0 + throughput: 344.3526170798898 estimated_peak_memory_range: - min: 12288 - max: 8281872 + min: 16384 + max: 7126672 primary_compute_unit: NPU precision: int8 layer_info: @@ -194,7 +194,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jgo2oeokp + job_id: jgjvrm9eg job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -203,13 +203,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T04:01:21Z' + timestamp: '2024-12-12T01:30:16Z' - torchscript_onnx_tflite: - inference_time: 7968.0 - throughput: 125.50200803212851 + inference_time: 7674.0 + throughput: 130.31013812874642 estimated_peak_memory_range: - min: 12288 - max: 7407272 + min: 40960 + max: 2824176 primary_compute_unit: NPU precision: int8 layer_info: @@ -217,7 +217,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jgn6orxq5 + job_id: jgn6zydq5 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -226,13 +226,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T04:01:01Z' + timestamp: '2024-12-12T01:29:57Z' - torchscript_onnx_tflite: - inference_time: 634.0 - throughput: 1577.2870662460568 + inference_time: 638.0 + throughput: 1567.398119122257 estimated_peak_memory_range: - min: 20480 - max: 287215528 + min: 16384 + max: 28756952 primary_compute_unit: NPU precision: int8 layer_info: @@ -240,14 +240,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jprvo197g + job_id: jprvlqm7g job_status: Passed torchscript_onnx_qnn: inference_time: 648.0 throughput: 1543.20987654321 estimated_peak_memory_range: min: 176128 - max: 1390992 + max: 1448984 primary_compute_unit: NPU precision: int8 layer_info: @@ -255,7 +255,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jpv6ezer5 + job_id: jped71qv5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -264,13 +264,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T04:01:23Z' + timestamp: '2024-12-12T01:30:18Z' - torchscript_onnx_tflite: - inference_time: 8425.0 - throughput: 118.69436201780415 + inference_time: 8514.0 + throughput: 117.45360582569884 estimated_peak_memory_range: - min: 32768 - max: 17873024 + min: 16384 + max: 19673664 primary_compute_unit: NPU precision: int8 layer_info: @@ -278,14 +278,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jp2k43jqp + job_id: jp2kr6qqp job_status: Passed torchscript_onnx_qnn: - inference_time: 8737.0 - throughput: 114.45576284765939 + inference_time: 8742.0 + throughput: 114.39029970258522 estimated_peak_memory_range: - min: 98304 - max: 5731040 + min: 94208 + max: 10592048 primary_compute_unit: NPU precision: int8 layer_info: @@ -293,7 +293,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jped848v5 + job_id: j5welvkm5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -302,13 +302,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T04:01:26Z' + timestamp: '2024-12-12T01:30:22Z' - torchscript_onnx_tflite: - inference_time: 643.0 - throughput: 1555.2099533437015 + inference_time: 645.0 + throughput: 1550.3875968992247 estimated_peak_memory_range: min: 16384 - max: 6535408 + max: 15825872 primary_compute_unit: NPU precision: int8 layer_info: @@ -316,14 +316,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jpy1qvnlp + job_id: jpy1owklp job_status: Passed torchscript_onnx_qnn: - inference_time: 652.0 - throughput: 1533.7423312883436 + inference_time: 655.0 + throughput: 1526.7175572519084 estimated_peak_memory_range: - min: 172032 - max: 1994024 + min: 180224 + max: 1775440 primary_compute_unit: NPU precision: int8 layer_info: @@ -331,7 +331,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jgz38v8x5 + job_id: jg9lz1r8g job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -340,13 +340,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T04:01:28Z' + timestamp: '2024-12-12T01:30:24Z' - torchscript_onnx_tflite: - inference_time: 1130.0 - throughput: 884.9557522123894 + inference_time: 1150.0 + throughput: 869.5652173913044 estimated_peak_memory_range: min: 16384 - max: 16279664 + max: 15915632 primary_compute_unit: NPU precision: int8 layer_info: @@ -354,14 +354,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jp0zdekn5 + job_id: jp8qe9dop job_status: Passed torchscript_onnx_qnn: - inference_time: 1440.0 - throughput: 694.4444444444445 + inference_time: 1303.0 + throughput: 767.4597083653108 estimated_peak_memory_range: min: 0 - max: 5954848 + max: 5959136 primary_compute_unit: NPU precision: int8 layer_info: @@ -369,7 +369,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: j5we8m8m5 + job_id: jp14nl97p job_status: Passed reference_device_info: name: SA8295P ADP @@ -378,13 +378,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T04:01:30Z' + timestamp: '2024-12-12T01:30:26Z' - torchscript_onnx_tflite: - inference_time: 642.0 - throughput: 1557.632398753894 + inference_time: 644.0 + throughput: 1552.7950310559006 estimated_peak_memory_range: - min: 20480 - max: 230375248 + min: 24576 + max: 8617104 primary_compute_unit: NPU precision: int8 layer_info: @@ -392,14 +392,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jp8q6w8op + job_id: jgke2nwng job_status: Passed torchscript_onnx_qnn: - inference_time: 652.0 - throughput: 1533.7423312883436 + inference_time: 653.0 + throughput: 1531.3935681470139 estimated_peak_memory_range: min: 176128 - max: 1395000 + max: 1356992 primary_compute_unit: NPU precision: int8 layer_info: @@ -407,7 +407,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jg9lk9k8g + job_id: jgdxd9kzp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -416,13 +416,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T04:01:32Z' + timestamp: '2024-12-12T01:30:28Z' - torchscript_onnx_tflite: - inference_time: 956.0 - throughput: 1046.0251046025105 + inference_time: 958.0 + throughput: 1043.8413361169103 estimated_peak_memory_range: - min: 16384 - max: 18172192 + min: 98304 + max: 21074256 primary_compute_unit: NPU precision: int8 layer_info: @@ -430,14 +430,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jgkeordng + job_id: j5q6lkxop job_status: Passed torchscript_onnx_qnn: - inference_time: 1151.0 - throughput: 868.8097306689835 + inference_time: 1145.0 + throughput: 873.3624454148471 estimated_peak_memory_range: - min: 0 - max: 5892800 + min: 163840 + max: 6149296 primary_compute_unit: NPU precision: int8 layer_info: @@ -445,7 +445,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jp147q77p + job_id: j57yewm95 job_status: Passed reference_device_info: name: SA8775P ADP @@ -454,13 +454,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T04:01:34Z' + timestamp: '2024-12-12T01:30:30Z' - torchscript_onnx_tflite: - inference_time: 770.0 - throughput: 1298.7012987012988 + inference_time: 757.0 + throughput: 1321.003963011889 estimated_peak_memory_range: - min: 0 - max: 22915552 + min: 16384 + max: 20243360 primary_compute_unit: NPU precision: int8 layer_info: @@ -468,14 +468,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: j5q6z9wop + job_id: j56y8jxyp job_status: Passed torchscript_onnx_qnn: - inference_time: 761.0 - throughput: 1314.060446780552 + inference_time: 765.0 + throughput: 1307.18954248366 estimated_peak_memory_range: - min: 192512 - max: 22474208 + min: 172032 + max: 23539872 primary_compute_unit: NPU precision: int8 layer_info: @@ -483,7 +483,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jgdx878zp + job_id: jp4lyo715 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -492,13 +492,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T04:01:35Z' + timestamp: '2024-12-12T01:30:31Z' - torchscript_onnx_qnn: - inference_time: 724.0 - throughput: 1381.2154696132598 + inference_time: 726.0 + throughput: 1377.4104683195592 estimated_peak_memory_range: - min: 479232 - max: 479232 + min: 446464 + max: 446464 primary_compute_unit: NPU precision: int8 layer_info: @@ -506,7 +506,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jgjvokoeg + job_id: jgz3l96x5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -515,4 +515,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T04:01:43Z' + timestamp: '2024-12-12T01:30:20Z' diff --git a/qai_hub_models/models/lama_dilated/export.py b/qai_hub_models/models/lama_dilated/export.py index 9bd4f09a..87df4f28 100644 --- a/qai_hub_models/models/lama_dilated/export.py +++ b/qai_hub_models/models/lama_dilated/export.py @@ -206,7 +206,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_onnx=False) + parser = export_parser(model_cls=Model) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/lama_dilated/perf.yaml b/qai_hub_models/models/lama_dilated/perf.yaml index d3f70b2c..0e9943cf 100644 --- a/qai_hub_models/models/lama_dilated/perf.yaml +++ b/qai_hub_models/models/lama_dilated/perf.yaml @@ -44,14 +44,15 @@ aggregated: models: - name: LaMa-Dilated universal_assets: - torchscript_onnx_tflite: mmdy8doom + torchscript_onnx_tflite: mnw8erpxn + torchscript_onnx: mmr365grm performance_metrics: - torchscript_onnx_tflite: - inference_time: 75108.0 - throughput: 13.314160941577462 + inference_time: 75011.0 + throughput: 13.331378064550533 estimated_peak_memory_range: - min: 3244032 - max: 336901656 + min: 3248128 + max: 337863680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -59,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 343 - job_id: jgz38jlz5 + job_id: jpxkl8885 job_status: Passed torchscript_onnx_qnn: - inference_time: 70815.0 - throughput: 14.121301984042928 + inference_time: 70796.0 + throughput: 14.125091813096786 estimated_peak_memory_range: - min: 3211264 - max: 38987080 + min: 3256320 + max: 39862048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -74,7 +75,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 332 - job_id: jprvo2xvg + job_id: jglvy9d25 + job_status: Passed + torchscript_onnx: + inference_time: 73503.0 + throughput: 13.604886875365631 + estimated_peak_memory_range: + min: 270336 + max: 1307230328 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 339 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 339 + job_id: j57ye1mn5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -83,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:09:16Z' + timestamp: '2024-12-11T23:36:28Z' - torchscript_onnx_tflite: - inference_time: 52398.0 - throughput: 19.084697889232412 + inference_time: 52544.0 + throughput: 19.03166869671133 estimated_peak_memory_range: - min: 2404352 - max: 95474480 + min: 3006464 + max: 96728688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -97,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 343 - job_id: j5we83lz5 + job_id: j5mn0117p job_status: Passed torchscript_onnx_qnn: - inference_time: 49746.0 - throughput: 20.102118763317655 + inference_time: 48034.0 + throughput: 20.818586834325686 estimated_peak_memory_range: - min: 4214784 - max: 98112080 + min: 4235264 + max: 93478448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -112,7 +128,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 332 - job_id: jp2k49oxp + job_id: j56y89xnp + job_status: Passed + torchscript_onnx: + inference_time: 50628.0 + throughput: 19.751915935845776 + estimated_peak_memory_range: + min: 0 + max: 279600752 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 339 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 339 + job_id: jp4ly6725 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -121,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:09:17Z' + timestamp: '2024-12-11T23:36:29Z' - torchscript_onnx_tflite: - inference_time: 49091.0 - throughput: 20.370332647532134 + inference_time: 49125.0 + throughput: 20.356234096692113 estimated_peak_memory_range: - min: 2850816 - max: 95931360 + min: 2519040 + max: 96117136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -135,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 343 - job_id: jg9lkyzqg + job_id: jgn6zddj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 38188.0 - throughput: 26.186236514088197 + inference_time: 45679.0 + throughput: 21.891897808621028 estimated_peak_memory_range: - min: 1265664 - max: 91984240 + min: 4100096 + max: 95784032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -150,7 +181,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 332 - job_id: jpy1qj8rp + job_id: jp3jzldmg + job_status: Passed + torchscript_onnx: + inference_time: 47643.0 + throughput: 20.98944231051781 + estimated_peak_memory_range: + min: 0 + max: 171753392 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 339 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 339 + job_id: jpxkl8q85 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -159,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:09:18Z' + timestamp: '2024-12-11T23:36:30Z' - torchscript_onnx_tflite: - inference_time: 74849.0 - throughput: 13.360231933626368 + inference_time: 75063.0 + throughput: 13.322142733437246 estimated_peak_memory_range: - min: 3268608 - max: 340046624 + min: 3256320 + max: 337030112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -173,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 343 - job_id: jp147wnkp + job_id: jprvlmmkg job_status: Passed torchscript_onnx_qnn: - inference_time: 66486.0 - throughput: 15.040760460848901 + inference_time: 66641.0 + throughput: 15.005777224231329 estimated_peak_memory_range: - min: 4403200 - max: 5641184 + min: 4354048 + max: 5585472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -188,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 332 - job_id: jp0zd2o25 + job_id: jgo2l7x1p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -197,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:09:19Z' + timestamp: '2024-12-11T23:36:19Z' - torchscript_onnx_tflite: - inference_time: 3020557.0 - throughput: 0.3310647671936004 + inference_time: 3020421.0 + throughput: 0.3310796739924666 estimated_peak_memory_range: - min: 3387392 - max: 95435696 + min: 339968 + max: 92824256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -211,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 343 - job_id: jgdx8qdkp + job_id: jp2krqq6p job_status: Passed torchscript_onnx_qnn: - inference_time: 3006666.0 - throughput: 0.3325943087792259 + inference_time: 3006571.0 + throughput: 0.3326048179138294 estimated_peak_memory_range: - min: 3260416 - max: 9041312 + min: 2949120 + max: 13226128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -226,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 332 - job_id: jgkeoq6yg + job_id: jgjvr691g job_status: Passed reference_device_info: name: SA7255P ADP @@ -235,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:09:22Z' + timestamp: '2024-12-11T23:36:21Z' - torchscript_onnx_tflite: - inference_time: 75137.0 - throughput: 13.309022186139984 + inference_time: 74930.0 + throughput: 13.345789403443213 estimated_peak_memory_range: - min: 3244032 - max: 336797240 + min: 3256320 + max: 350287056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -249,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 343 - job_id: j57ykleq5 + job_id: jpy1okk0p job_status: Passed torchscript_onnx_qnn: - inference_time: 66629.0 - throughput: 15.008479791081962 + inference_time: 66578.0 + throughput: 15.019976568836553 estimated_peak_memory_range: - min: 4308992 - max: 5864896 + min: 4370432 + max: 5902736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -264,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 332 - job_id: j5q6zr47p + job_id: jped70q85 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -273,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:09:23Z' + timestamp: '2024-12-11T23:36:22Z' - torchscript_onnx_tflite: - inference_time: 115862.0 - throughput: 8.630957518427094 + inference_time: 115892.0 + throughput: 8.628723294101405 estimated_peak_memory_range: - min: 3235840 - max: 46366688 + min: 3219456 + max: 46677712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -287,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 343 - job_id: jp4lmdyq5 + job_id: jp0zm8w05 job_status: Passed torchscript_onnx_qnn: - inference_time: 103924.0 - throughput: 9.622416381201647 + inference_time: 103929.0 + throughput: 9.621953448989213 estimated_peak_memory_range: - min: 1282048 - max: 7234128 + min: 1331200 + max: 7448768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -302,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 332 - job_id: jglvo2we5 + job_id: j5wel0k45 job_status: Passed reference_device_info: name: SA8295P ADP @@ -311,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:09:24Z' + timestamp: '2024-12-11T23:36:24Z' - torchscript_onnx_tflite: - inference_time: 74964.0 - throughput: 13.339736406808601 + inference_time: 75690.0 + throughput: 13.21178491214163 estimated_peak_memory_range: - min: 3268608 - max: 327493256 + min: 3235840 + max: 328110424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -325,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 343 - job_id: jpxk36lj5 + job_id: jp8qednqp job_status: Passed torchscript_onnx_qnn: - inference_time: 66611.0 - throughput: 15.012535467115041 + inference_time: 66800.0 + throughput: 14.970059880239521 estimated_peak_memory_range: - min: 4386816 - max: 5693440 + min: 4374528 + max: 5514856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -340,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 332 - job_id: j56yrzovp + job_id: jg9lz7rmg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -349,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:09:25Z' + timestamp: '2024-12-11T23:36:25Z' - torchscript_onnx_tflite: - inference_time: 129835.0 - throughput: 7.7020834135633685 + inference_time: 129854.0 + throughput: 7.700956458792182 estimated_peak_memory_range: - min: 3256320 - max: 95011120 + min: 3264512 + max: 95212048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -363,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 343 - job_id: j5mno60yp + job_id: jgke2w1vg job_status: Passed torchscript_onnx_qnn: - inference_time: 119102.0 - throughput: 8.396164631996104 + inference_time: 119068.0 + throughput: 8.398562166157154 estimated_peak_memory_range: - min: 1605632 - max: 7085552 + min: 2265088 + max: 8446800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -378,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 332 - job_id: jp3jx1oxg + job_id: jp14nk9np job_status: Passed reference_device_info: name: SA8775P ADP @@ -387,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:09:26Z' + timestamp: '2024-12-11T23:36:26Z' - torchscript_onnx_tflite: - inference_time: 108457.0 - throughput: 9.220243967655383 + inference_time: 107830.0 + throughput: 9.273856997125105 estimated_peak_memory_range: - min: 3203072 - max: 52718496 + min: 3252224 + max: 49911248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -401,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 343 - job_id: jgn6om1v5 + job_id: j5q6lxnep job_status: Passed torchscript_onnx_qnn: - inference_time: 99395.0 - throughput: 10.060868252930227 + inference_time: 99617.0 + throughput: 10.038447252978909 estimated_peak_memory_range: min: 4292608 - max: 51574224 + max: 49763552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -416,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 332 - job_id: jgo2ond4p + job_id: jgdxdyk6p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -425,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:09:27Z' + timestamp: '2024-12-11T23:36:27Z' - torchscript_onnx_qnn: - inference_time: 69456.0 - throughput: 14.397604238654688 + inference_time: 69489.0 + throughput: 14.390766883967247 estimated_peak_memory_range: min: 4202496 max: 4202496 @@ -439,7 +485,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 332 - job_id: jp8q6mjzp + job_id: jpv6ly8z5 + job_status: Passed + torchscript_onnx: + inference_time: 78520.0 + throughput: 12.735608762098828 + estimated_peak_memory_range: + min: 94838784 + max: 94838784 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 339 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 339 + job_id: j5mn0177p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -448,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:09:20Z' + timestamp: '2024-12-11T23:36:31Z' diff --git a/qai_hub_models/models/litehrnet/perf.yaml b/qai_hub_models/models/litehrnet/perf.yaml index 05e08cdb..b2026c6b 100644 --- a/qai_hub_models/models/litehrnet/perf.yaml +++ b/qai_hub_models/models/litehrnet/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: LiteHRNet universal_assets: - torchscript_onnx_tflite: mn1z837pm - torchscript_onnx: mqy3w2j5m + torchscript_onnx_tflite: mn7xlv3vq + torchscript_onnx: mmr365xrm performance_metrics: - torchscript_onnx_tflite: - inference_time: 7877.0 - throughput: 126.95188523549575 + inference_time: 7839.0 + throughput: 127.56729174639622 estimated_peak_memory_range: - min: 36864 - max: 15618728 + min: 16384 + max: 17058968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 1235 - job_id: jg9lkymqg + job_id: jpxkl8z85 job_status: Passed torchscript_onnx: - inference_time: 7308.0 - throughput: 136.83634373289544 + inference_time: 7261.0 + throughput: 137.72207684891887 estimated_peak_memory_range: - min: 0 - max: 6116712 + min: 1040384 + max: 73902024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,7 +75,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 4 total_layers: 1258 - job_id: j5q6zrl7p + job_id: jpv6lyyz5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -84,13 +84,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T02:08:32Z' + timestamp: '2024-12-11T23:35:33Z' - torchscript_onnx_tflite: - inference_time: 4774.0 - throughput: 209.46795140343528 + inference_time: 4790.0 + throughput: 208.76826722338205 estimated_peak_memory_range: - min: 16384 - max: 39504336 + min: 225280 + max: 41470560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -98,14 +98,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 1235 - job_id: jp147wjkp + job_id: j5mn01l7p job_status: Passed torchscript_onnx: - inference_time: 4737.0 - throughput: 211.10407430863415 + inference_time: 4545.0 + throughput: 220.02200220022002 estimated_peak_memory_range: - min: 1060864 - max: 116891072 + min: 643072 + max: 116684800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,7 +113,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 4 total_layers: 1258 - job_id: jglvo2ye5 + job_id: jgjvr661g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -122,13 +122,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T02:08:33Z' + timestamp: '2024-12-11T23:35:34Z' - torchscript_onnx_tflite: - inference_time: 4296.0 - throughput: 232.77467411545624 + inference_time: 5262.0 + throughput: 190.04180919802357 estimated_peak_memory_range: - min: 12288 - max: 32329520 + min: 221184 + max: 35096448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,14 +136,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 1235 - job_id: jgdx8q3kp + job_id: jgn6zdwj5 job_status: Passed torchscript_onnx: - inference_time: 4942.0 - throughput: 202.34722784297855 + inference_time: 4935.0 + throughput: 202.63424518743668 estimated_peak_memory_range: - min: 921600 - max: 83114336 + min: 0 + max: 81848112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -151,7 +151,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 4 total_layers: 1258 - job_id: j56yrz8vp + job_id: jped70085 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -160,13 +160,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T02:08:34Z' + timestamp: '2024-12-11T23:35:35Z' - torchscript_onnx_tflite: - inference_time: 7832.0 - throughput: 127.68130745658836 + inference_time: 7834.0 + throughput: 127.64871074802144 estimated_peak_memory_range: - min: 24576 - max: 18770264 + min: 16384 + max: 16139368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,7 +174,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 1235 - job_id: j57ykl4q5 + job_id: jprvlm7kg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -183,13 +183,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T02:08:16Z' + timestamp: '2024-12-11T23:35:18Z' - torchscript_onnx_tflite: - inference_time: 28622.0 - throughput: 34.93815945775977 + inference_time: 28707.0 + throughput: 34.83470930435085 estimated_peak_memory_range: - min: 266240 - max: 31185744 + min: 262144 + max: 31851072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -197,7 +197,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 1235 - job_id: jp4lmd1q5 + job_id: jp2krqz6p job_status: Passed reference_device_info: name: SA7255P ADP @@ -206,13 +206,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T02:08:17Z' + timestamp: '2024-12-11T23:35:19Z' - torchscript_onnx_tflite: - inference_time: 7875.0 - throughput: 126.98412698412699 + inference_time: 7888.0 + throughput: 126.77484787018255 estimated_peak_memory_range: - min: 16384 - max: 15994472 + min: 20480 + max: 16002488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -220,7 +220,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 1235 - job_id: jpxk364j5 + job_id: jpy1oky0p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -229,13 +229,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T02:08:19Z' + timestamp: '2024-12-11T23:35:20Z' - torchscript_onnx_tflite: - inference_time: 9858.0 - throughput: 101.44045445323594 + inference_time: 9903.0 + throughput: 100.97950116126427 estimated_peak_memory_range: min: 225280 - max: 27242288 + max: 26889744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -243,7 +243,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 1235 - job_id: j5mno6myp + job_id: jp0zm8805 job_status: Passed reference_device_info: name: SA8295P ADP @@ -252,13 +252,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T02:08:20Z' + timestamp: '2024-12-11T23:35:21Z' - torchscript_onnx_tflite: - inference_time: 7892.0 - throughput: 126.71059300557526 + inference_time: 7840.0 + throughput: 127.55102040816327 estimated_peak_memory_range: - min: 20480 - max: 14413040 + min: 16384 + max: 8161480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -266,7 +266,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 1235 - job_id: jgn6omzv5 + job_id: jp8qeddqp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -275,13 +275,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T02:08:21Z' + timestamp: '2024-12-11T23:35:22Z' - torchscript_onnx_tflite: - inference_time: 10712.0 - throughput: 93.35324869305452 + inference_time: 10742.0 + throughput: 93.0925339787749 estimated_peak_memory_range: - min: 245760 - max: 31671216 + min: 225280 + max: 35759472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -289,7 +289,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 1235 - job_id: jprvo2lvg + job_id: jgke2wwvg job_status: Passed reference_device_info: name: SA8775P ADP @@ -298,13 +298,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T02:08:22Z' + timestamp: '2024-12-11T23:35:23Z' - torchscript_onnx_tflite: - inference_time: 8527.0 - throughput: 117.27453969743169 + inference_time: 8517.0 + throughput: 117.41223435481977 estimated_peak_memory_range: - min: 241664 - max: 33498064 + min: 245760 + max: 38439808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -312,7 +312,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 1235 - job_id: jp2k49rxp + job_id: j5q6lxxep job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -321,13 +321,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T02:08:23Z' + timestamp: '2024-12-11T23:35:24Z' - torchscript_onnx: - inference_time: 8163.0 - throughput: 122.50398137939483 + inference_time: 8185.0 + throughput: 122.17470983506414 estimated_peak_memory_range: - min: 4653056 - max: 4653056 + min: 5570560 + max: 5570560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -335,7 +335,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 4 total_layers: 1258 - job_id: jp3jx1zxg + job_id: jgz3lqq45 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -344,4 +344,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T02:08:36Z' + timestamp: '2024-12-11T23:35:36Z' diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/model.py b/qai_hub_models/models/llama_v2_7b_chat_quantized/model.py index 04a6f0df..95e5a5c7 100644 --- a/qai_hub_models/models/llama_v2_7b_chat_quantized/model.py +++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/model.py @@ -47,7 +47,7 @@ MODEL_ID = __name__.split(".")[-2] -MODEL_ASSET_VERSION = 8 +MODEL_ASSET_VERSION = 9 # Configs AIMET_ENCODINGS_PREFIX = "config" @@ -302,12 +302,10 @@ def _get_llama_model_with_split( model.eval() # Download quantization config and pre-computed encodings - model_encoding_tag = "tg" if is_token_generator else "pp" aimet_encodings = str( os.path.join( AIMET_ENCODINGS_PREFIX, - model_encoding_tag, - f"llama_{model_encoding_tag}_sha_{split_part - 1}.encodings", + f"llama_sha_{split_part - 1}.encodings", ) ) aimet_encodings = str( diff --git a/qai_hub_models/models/mediapipe_face/perf.yaml b/qai_hub_models/models/mediapipe_face/perf.yaml index 4d23e515..6f0fa68d 100644 --- a/qai_hub_models/models/mediapipe_face/perf.yaml +++ b/qai_hub_models/models/mediapipe_face/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: MediaPipeFaceDetector universal_assets: - torchscript_onnx_tflite: mmr62dw6m - torchscript_onnx: mq9pgxrln + torchscript_onnx_tflite: mqkvk2jzm + torchscript_onnx: mn7xlv8jq performance_metrics: - torchscript_onnx_tflite: - inference_time: 554.0 - throughput: 1805.0541516245487 + inference_time: 559.0 + throughput: 1788.9087656529516 estimated_peak_memory_range: - min: 20480 - max: 5610936 + min: 16384 + max: 38355680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 111 - job_id: jpy1qjq0p + job_id: jp8qedeop job_status: Passed torchscript_onnx_qnn: - inference_time: 630.0 - throughput: 1587.3015873015872 + inference_time: 632.0 + throughput: 1582.2784810126582 estimated_peak_memory_range: - min: 811008 - max: 5869224 + min: 806912 + max: 41117032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jgdx8qzkp + job_id: jprvlmx7g job_status: Passed torchscript_onnx: - inference_time: 995.0 - throughput: 1005.0251256281407 + inference_time: 1055.0 + throughput: 947.8672985781991 estimated_peak_memory_range: - min: 253952 - max: 2565272 + min: 835584 + max: 2343088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jg9lkydqg + job_id: j5mn01k9p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:58:18Z' + timestamp: '2024-12-11T23:24:21Z' - torchscript_onnx_tflite: - inference_time: 398.0 - throughput: 2512.5628140703516 + inference_time: 404.0 + throughput: 2475.2475247524753 estimated_peak_memory_range: min: 12288 - max: 17057328 + max: 20817600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 111 - job_id: jp8q6m4qp + job_id: j5q6lxlop job_status: Passed torchscript_onnx_qnn: - inference_time: 460.0 - throughput: 2173.913043478261 + inference_time: 454.0 + throughput: 2202.643171806167 estimated_peak_memory_range: min: 0 - max: 15379568 + max: 14792256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jp4lmd9q5 + job_id: jpy1ok8lp job_status: Passed torchscript_onnx: - inference_time: 754.0 - throughput: 1326.2599469496022 + inference_time: 739.0 + throughput: 1353.1799729364006 estimated_peak_memory_range: min: 0 - max: 44111552 + max: 44226448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jgdx8q2kp + job_id: jprvlmd7g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:58:20Z' + timestamp: '2024-12-11T23:24:23Z' - torchscript_onnx_tflite: - inference_time: 414.0 - throughput: 2415.458937198068 + inference_time: 417.0 + throughput: 2398.0815347721823 estimated_peak_memory_range: min: 12288 - max: 11742976 + max: 13007824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 111 - job_id: j5q6zrmep + job_id: j56y89oyp job_status: Passed torchscript_onnx_qnn: - inference_time: 501.0 - throughput: 1996.007984031936 + inference_time: 386.0 + throughput: 2590.6735751295337 estimated_peak_memory_range: min: 0 - max: 12754208 + max: 13048448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: j5mno6dyp + job_id: jp8qedjop job_status: Passed torchscript_onnx: inference_time: 759.0 throughput: 1317.5230566534915 estimated_peak_memory_range: min: 0 - max: 32840064 + max: 32494480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jp4lmd3q5 + job_id: jpy1ok2lp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:58:22Z' + timestamp: '2024-12-11T23:24:25Z' - torchscript_onnx_tflite: - inference_time: 559.0 - throughput: 1788.9087656529516 + inference_time: 548.0 + throughput: 1824.8175182481752 estimated_peak_memory_range: - min: 16384 - max: 79294720 + min: 12288 + max: 79428552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 111 - job_id: j56yrzdnp + job_id: jgo2l7dkp job_status: Passed torchscript_onnx_qnn: - inference_time: 601.0 - throughput: 1663.8935108153078 + inference_time: 603.0 + throughput: 1658.374792703151 estimated_peak_memory_range: - min: 811008 - max: 1997696 + min: 823296 + max: 2085600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jprvo2wvg + job_id: j5q6lx4op job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:58:03Z' + timestamp: '2024-12-11T23:24:05Z' - torchscript_onnx_tflite: - inference_time: 18865.0 - throughput: 53.00821627352239 + inference_time: 18827.0 + throughput: 53.115206883730814 estimated_peak_memory_range: - min: 16384 - max: 12278544 + min: 102400 + max: 17024352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 111 - job_id: jgo2on41p + job_id: jgjvr63eg job_status: Passed torchscript_onnx_qnn: - inference_time: 19081.0 - throughput: 52.4081547088727 + inference_time: 19157.0 + throughput: 52.20024012110456 estimated_peak_memory_range: - min: 778240 - max: 6291840 + min: 761856 + max: 11233584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jp8q6m1zp + job_id: jgo2l7wkp job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:58:06Z' + timestamp: '2024-12-11T23:24:10Z' - torchscript_onnx_tflite: - inference_time: 557.0 - throughput: 1795.3321364452424 + inference_time: 555.0 + throughput: 1801.8018018018017 estimated_peak_memory_range: - min: 24576 - max: 5254440 + min: 16384 + max: 5557368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 111 - job_id: jgjvo2w1g + job_id: jgz3lqzx5 job_status: Passed torchscript_onnx_qnn: - inference_time: 609.0 - throughput: 1642.0361247947455 + inference_time: 607.0 + throughput: 1647.4464579901153 estimated_peak_memory_range: min: 823296 - max: 2127424 + max: 2096728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: j5q6zrv7p + job_id: jgjvr6yeg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:58:09Z' + timestamp: '2024-12-11T23:24:12Z' - torchscript_onnx_tflite: - inference_time: 1131.0 - throughput: 884.1732979664014 + inference_time: 1141.0 + throughput: 876.4241893076249 estimated_peak_memory_range: min: 16384 - max: 10398064 + max: 10643856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 111 - job_id: jgz38j445 + job_id: jg9lz7o8g job_status: Passed torchscript_onnx_qnn: - inference_time: 1230.0 - throughput: 813.0081300813008 + inference_time: 1221.0 + throughput: 819.000819000819 estimated_peak_memory_range: min: 0 - max: 5974768 + max: 5888160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: j56yrzwvp + job_id: jgz3lqyx5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:58:11Z' + timestamp: '2024-12-11T23:24:14Z' - torchscript_onnx_tflite: - inference_time: 560.0 - throughput: 1785.7142857142858 + inference_time: 553.0 + throughput: 1808.3182640144666 estimated_peak_memory_range: min: 16384 - max: 78705272 + max: 5268352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 111 - job_id: jg9lkyxmg + job_id: jgdxdy6zp job_status: Passed torchscript_onnx_qnn: - inference_time: 603.0 - throughput: 1658.374792703151 + inference_time: 617.0 + throughput: 1620.7455429497568 estimated_peak_memory_range: - min: 811008 - max: 2435944 + min: 819200 + max: 2069192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jgo2on84p + job_id: jg9lz728g job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:58:13Z' + timestamp: '2024-12-11T23:24:16Z' - torchscript_onnx_tflite: - inference_time: 1247.0 - throughput: 801.924619085806 + inference_time: 1273.0 + throughput: 785.5459544383347 estimated_peak_memory_range: - min: 73728 - max: 11773296 + min: 0 + max: 17987568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 111 - job_id: jgdx8qz6p + job_id: jp4ly6e15 job_status: Passed torchscript_onnx_qnn: - inference_time: 1446.0 - throughput: 691.5629322268327 + inference_time: 1468.0 + throughput: 681.1989100817439 estimated_peak_memory_range: - min: 806912 - max: 6691488 + min: 802816 + max: 6900784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jgjvo2q7g + job_id: jgdxdy4zp job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:58:15Z' + timestamp: '2024-12-11T23:24:18Z' - torchscript_onnx_tflite: - inference_time: 764.0 - throughput: 1308.9005235602094 + inference_time: 766.0 + throughput: 1305.4830287206266 estimated_peak_memory_range: - min: 16384 - max: 16231504 + min: 0 + max: 15145664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 111 - job_id: jg9lkyxqg + job_id: j5mn0199p job_status: Passed torchscript_onnx_qnn: - inference_time: 839.0 - throughput: 1191.8951132300358 + inference_time: 827.0 + throughput: 1209.1898428053205 estimated_peak_memory_range: - min: 819200 - max: 16083968 + min: 802816 + max: 15687824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jgz38jnz5 + job_id: jp4ly6415 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:58:17Z' + timestamp: '2024-12-11T23:24:19Z' - torchscript_onnx_qnn: - inference_time: 743.0 - throughput: 1345.8950201884254 + inference_time: 748.0 + throughput: 1336.8983957219252 estimated_peak_memory_range: min: 786432 max: 786432 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jpy1qjmrp + job_id: j56y89myp job_status: Passed torchscript_onnx: - inference_time: 1064.0 - throughput: 939.8496240601504 + inference_time: 1036.0 + throughput: 965.2509652509652 estimated_peak_memory_range: - min: 1925120 - max: 1925120 + min: 2207744 + max: 2207744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: j5mno68yp + job_id: jp8qedrop job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,18 +509,18 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:58:23Z' + timestamp: '2024-12-11T23:24:27Z' - name: MediaPipeFaceLandmarkDetector universal_assets: - torchscript_onnx_tflite: mq24zjejm - torchscript_onnx: mq24zj8jm + torchscript_onnx_tflite: mq9lpj10q + torchscript_onnx: mn0jxe53m performance_metrics: - torchscript_onnx_tflite: inference_time: 196.0 throughput: 5102.040816326531 estimated_peak_memory_range: min: 0 - max: 80409456 + max: 8037512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -528,14 +528,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: jp0zd2v05 + job_id: jgke2w2ng job_status: Passed torchscript_onnx_qnn: - inference_time: 276.0 - throughput: 3623.1884057971015 + inference_time: 278.0 + throughput: 3597.122302158273 estimated_peak_memory_range: - min: 16384 - max: 7758896 + min: 471040 + max: 8044696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -543,14 +543,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: j57ykl7q5 + job_id: jp2krqoqp job_status: Passed torchscript_onnx: - inference_time: 500.0 - throughput: 2000.0 + inference_time: 510.0 + throughput: 1960.7843137254902 estimated_peak_memory_range: - min: 16384 - max: 11211112 + min: 458752 + max: 28638768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -558,7 +558,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jp147w6kp + job_id: jgn6zdqq5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -567,13 +567,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:58:19Z' + timestamp: '2024-12-11T23:24:22Z' - torchscript_onnx_tflite: - inference_time: 149.0 - throughput: 6711.4093959731545 + inference_time: 147.0 + throughput: 6802.721088435374 estimated_peak_memory_range: - min: 16384 - max: 10603952 + min: 24576 + max: 11346096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -581,14 +581,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: jgkeoq9vg + job_id: jglvy9wm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 206.0 - throughput: 4854.368932038835 + inference_time: 211.0 + throughput: 4739.336492890995 estimated_peak_memory_range: min: 0 - max: 10147232 + max: 11394128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -596,14 +596,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jpxk36dj5 + job_id: jp0zm8on5 job_status: Passed torchscript_onnx: - inference_time: 386.0 - throughput: 2590.6735751295337 + inference_time: 399.0 + throughput: 2506.2656641604012 estimated_peak_memory_range: min: 0 - max: 33514384 + max: 33286480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -611,7 +611,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: j57ykl9q5 + job_id: jp2krqdqp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -620,13 +620,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:58:20Z' + timestamp: '2024-12-11T23:24:23Z' - torchscript_onnx_tflite: - inference_time: 158.0 - throughput: 6329.113924050633 + inference_time: 123.0 + throughput: 8130.081300813008 estimated_peak_memory_range: - min: 20480 - max: 10092352 + min: 421888 + max: 10153792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -634,14 +634,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: jglvo2125 + job_id: jp3jzlong job_status: Passed torchscript_onnx_qnn: - inference_time: 191.0 - throughput: 5235.602094240838 + inference_time: 209.0 + throughput: 4784.688995215311 estimated_peak_memory_range: min: 0 - max: 9893248 + max: 10135696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -649,14 +649,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jgn6omkv5 + job_id: jgke2w6ng job_status: Passed torchscript_onnx: - inference_time: 412.0 - throughput: 2427.1844660194174 + inference_time: 409.0 + throughput: 2444.987775061125 estimated_peak_memory_range: min: 0 - max: 19803712 + max: 19452336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -664,7 +664,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jpxk36xj5 + job_id: jp0zm89n5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -673,13 +673,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:58:22Z' + timestamp: '2024-12-11T23:24:25Z' - torchscript_onnx_tflite: - inference_time: 188.0 - throughput: 5319.148936170212 + inference_time: 199.0 + throughput: 5025.125628140703 estimated_peak_memory_range: min: 16384 - max: 7420760 + max: 7328432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -687,14 +687,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: jp3jx1wmg + job_id: jpv6ly2r5 job_status: Passed torchscript_onnx_qnn: - inference_time: 280.0 - throughput: 3571.4285714285716 + inference_time: 275.0 + throughput: 3636.3636363636365 estimated_peak_memory_range: - min: 475136 - max: 1887136 + min: 471040 + max: 1720552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -702,7 +702,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jp2k49exp + job_id: jglvy98m5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -711,13 +711,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:58:03Z' + timestamp: '2024-12-11T23:24:06Z' - torchscript_onnx_tflite: - inference_time: 3614.0 - throughput: 276.70171555063644 + inference_time: 3672.0 + throughput: 272.33115468409585 estimated_peak_memory_range: - min: 20480 - max: 10979472 + min: 16384 + max: 12993696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -725,14 +725,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: jpv6er9z5 + job_id: jped706v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 4040.0 - throughput: 247.52475247524754 + inference_time: 3963.0 + throughput: 252.33409033560434 estimated_peak_memory_range: - min: 393216 - max: 6093168 + min: 405504 + max: 10954656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -740,7 +740,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jgkeoq8yg + job_id: jpv6lymr5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -749,13 +749,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:58:07Z' + timestamp: '2024-12-11T23:24:10Z' - torchscript_onnx_tflite: - inference_time: 192.0 - throughput: 5208.333333333333 + inference_time: 195.0 + throughput: 5128.205128205128 estimated_peak_memory_range: - min: 20480 - max: 3740240 + min: 28672 + max: 7546496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -763,14 +763,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: jped8wl85 + job_id: j5wel0ym5 job_status: Passed torchscript_onnx_qnn: - inference_time: 277.0 - throughput: 3610.1083032490974 + inference_time: 287.0 + throughput: 3484.320557491289 estimated_peak_memory_range: - min: 16384 - max: 1578992 + min: 475136 + max: 2021560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -778,7 +778,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jglvo2le5 + job_id: jped70xv5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -787,13 +787,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:58:09Z' + timestamp: '2024-12-11T23:24:12Z' - torchscript_onnx_tflite: - inference_time: 568.0 - throughput: 1760.5633802816901 + inference_time: 576.0 + throughput: 1736.111111111111 estimated_peak_memory_range: min: 20480 - max: 8160896 + max: 8148224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -801,14 +801,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: j5we83145 + job_id: jp14nko7p job_status: Passed torchscript_onnx_qnn: - inference_time: 793.0 - throughput: 1261.034047919294 + inference_time: 791.0 + throughput: 1264.2225031605562 estimated_peak_memory_range: min: 0 - max: 6042656 + max: 5968064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -816,7 +816,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jp3jx16xg + job_id: j5wel0zm5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -825,13 +825,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:58:11Z' + timestamp: '2024-12-11T23:24:14Z' - torchscript_onnx_tflite: - inference_time: 199.0 - throughput: 5025.125628140703 + inference_time: 191.0 + throughput: 5235.602094240838 estimated_peak_memory_range: - min: 16384 - max: 85791432 + min: 32768 + max: 7329976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -839,14 +839,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: jp147wvnp + job_id: j57ye1o95 job_status: Passed torchscript_onnx_qnn: - inference_time: 281.0 - throughput: 3558.7188612099644 + inference_time: 280.0 + throughput: 3571.4285714285716 estimated_peak_memory_range: - min: 221184 - max: 1720808 + min: 466944 + max: 1807616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -854,7 +854,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jpv6er775 + job_id: jp14nk17p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -863,13 +863,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:58:13Z' + timestamp: '2024-12-11T23:24:16Z' - torchscript_onnx_tflite: - inference_time: 507.0 - throughput: 1972.3865877712033 + inference_time: 500.0 + throughput: 2000.0 estimated_peak_memory_range: - min: 24576 - max: 9009040 + min: 40960 + max: 13597056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -877,14 +877,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: j5we831z5 + job_id: jpxkl80l5 job_status: Passed torchscript_onnx_qnn: - inference_time: 763.0 - throughput: 1310.615989515072 + inference_time: 800.0 + throughput: 1250.0 estimated_peak_memory_range: - min: 462848 - max: 6413984 + min: 458752 + max: 6565872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -892,7 +892,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jped8wy75 + job_id: j57ye1n95 job_status: Passed reference_device_info: name: SA8775P ADP @@ -901,13 +901,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:58:15Z' + timestamp: '2024-12-11T23:24:18Z' - torchscript_onnx_tflite: - inference_time: 274.0 - throughput: 3649.6350364963505 + inference_time: 275.0 + throughput: 3636.3636363636365 estimated_peak_memory_range: min: 0 - max: 13086496 + max: 13986480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -915,14 +915,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: jp147wvkp + job_id: jgn6zd1q5 job_status: Passed torchscript_onnx_qnn: - inference_time: 368.0 - throughput: 2717.391304347826 + inference_time: 377.0 + throughput: 2652.5198938992044 estimated_peak_memory_range: - min: 458752 - max: 14460752 + min: 0 + max: 17007712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -930,7 +930,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: j5we834z5 + job_id: jpxkl8rl5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -939,10 +939,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:58:17Z' + timestamp: '2024-12-11T23:24:20Z' - torchscript_onnx_qnn: - inference_time: 391.0 - throughput: 2557.544757033248 + inference_time: 379.0 + throughput: 2638.5224274406332 estimated_peak_memory_range: min: 442368 max: 442368 @@ -953,14 +953,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jp0zd2625 + job_id: jp3jzl7ng job_status: Passed torchscript_onnx: - inference_time: 546.0 - throughput: 1831.5018315018315 + inference_time: 510.0 + throughput: 1960.7843137254902 estimated_peak_memory_range: - min: 1859584 - max: 1859584 + min: 3182592 + max: 3182592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -968,7 +968,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jgn6omnv5 + job_id: jgke2w0ng job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -977,4 +977,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:58:24Z' + timestamp: '2024-12-11T23:24:27Z' diff --git a/qai_hub_models/models/mediapipe_face_quantized/info.yaml b/qai_hub_models/models/mediapipe_face_quantized/info.yaml index 806d2c92..76de756b 100644 --- a/qai_hub_models/models/mediapipe_face_quantized/info.yaml +++ b/qai_hub_models/models/mediapipe_face_quantized/info.yaml @@ -11,6 +11,7 @@ use_case: Object Detection tags: - real-time - quantized +imsdk_supported: true research_paper: https://arxiv.org/abs/1907.05047 research_paper_title: 'BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs' diff --git a/qai_hub_models/models/mediapipe_face_quantized/perf.yaml b/qai_hub_models/models/mediapipe_face_quantized/perf.yaml index 35c40a50..63a61e6e 100644 --- a/qai_hub_models/models/mediapipe_face_quantized/perf.yaml +++ b/qai_hub_models/models/mediapipe_face_quantized/perf.yaml @@ -50,14 +50,14 @@ aggregated: models: - name: MediaPipeFaceDetector universal_assets: - torchscript_onnx_tflite: mn41dwlvn + torchscript_onnx_tflite: mn0jxek3m performance_metrics: - torchscript_onnx_tflite: - inference_time: 274.0 - throughput: 3649.6350364963505 + inference_time: 279.0 + throughput: 3584.2293906810037 estimated_peak_memory_range: min: 12288 - max: 76110328 + max: 26431816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -65,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 121 - job_id: jglvo2e25 + job_id: jg9lz7dwg job_status: Passed torchscript_onnx_qnn: - inference_time: 304.0 - throughput: 3289.4736842105262 + inference_time: 303.0 + throughput: 3300.3300330033003 estimated_peak_memory_range: - min: 212992 - max: 76872880 + min: 237568 + max: 82754816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -80,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 151 - job_id: jp3jx18mg + job_id: jg9lz7mwg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -89,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:56:47Z' + timestamp: '2024-12-11T23:22:48Z' - torchscript_onnx_tflite: - inference_time: 190.0 - throughput: 5263.1578947368425 + inference_time: 185.0 + throughput: 5405.405405405405 estimated_peak_memory_range: min: 12288 - max: 17751184 + max: 17239648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -103,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 121 - job_id: jp3jx1qmg + job_id: jgdxdy2rp job_status: Passed torchscript_onnx_qnn: - inference_time: 206.0 - throughput: 4854.368932038835 + inference_time: 210.0 + throughput: 4761.9047619047615 estimated_peak_memory_range: min: 0 - max: 17770736 + max: 16189456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -118,7 +118,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 151 - job_id: jpv6er4z5 + job_id: jgdxdy3rp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -127,13 +127,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:56:48Z' + timestamp: '2024-12-11T23:22:50Z' - torchscript_onnx_tflite: - inference_time: 167.0 - throughput: 5988.023952095808 + inference_time: 201.0 + throughput: 4975.124378109453 estimated_peak_memory_range: min: 8192 - max: 15178784 + max: 19428816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -141,14 +141,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 121 - job_id: jgjvo2k1g + job_id: jp4ly6385 job_status: Passed torchscript_onnx_qnn: - inference_time: 175.0 - throughput: 5714.285714285715 + inference_time: 176.0 + throughput: 5681.818181818182 estimated_peak_memory_range: min: 0 - max: 15135552 + max: 15371312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -156,7 +156,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 151 - job_id: jped8w285 + job_id: jg9lz7m8g job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -165,13 +165,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:56:50Z' + timestamp: '2024-12-11T23:22:52Z' - torchscript_onnx_tflite: - inference_time: 765.0 - throughput: 1307.18954248366 + inference_time: 685.0 + throughput: 1459.85401459854 estimated_peak_memory_range: - min: 24576 - max: 19446896 + min: 12288 + max: 22756144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -179,14 +179,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 121 - job_id: jgz38jv45 + job_id: j5mn018dp job_status: Passed torchscript_onnx_qnn: - inference_time: 827.0 - throughput: 1209.1898428053205 + inference_time: 740.0 + throughput: 1351.3513513513512 estimated_peak_memory_range: - min: 221184 - max: 7993728 + min: 32768 + max: 7439456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -194,7 +194,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 151 - job_id: j5we83x45 + job_id: jgdxdy3zp job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -203,13 +203,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T01:56:52Z' + timestamp: '2024-12-11T23:22:54Z' - torchscript_onnx_tflite: - inference_time: 5221.0 - throughput: 191.5341888527102 + inference_time: 4998.0 + throughput: 200.08003201280513 estimated_peak_memory_range: - min: 28672 - max: 5580112 + min: 12288 + max: 4625712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -217,7 +217,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 121 - job_id: jg9lky9mg + job_id: jprvlmw0g job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -226,13 +226,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T01:56:31Z' + timestamp: '2024-12-11T23:22:34Z' - torchscript_onnx_tflite: - inference_time: 275.0 - throughput: 3636.3636363636365 + inference_time: 278.0 + throughput: 3597.122302158273 estimated_peak_memory_range: min: 12288 - max: 10896488 + max: 10995144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -240,14 +240,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 121 - job_id: jgdx8q76p + job_id: jpy1okm8p job_status: Passed torchscript_onnx_qnn: - inference_time: 306.0 - throughput: 3267.97385620915 + inference_time: 305.0 + throughput: 3278.688524590164 estimated_peak_memory_range: - min: 229376 - max: 1485152 + min: 221184 + max: 1465568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -255,7 +255,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 151 - job_id: jp147w3np + job_id: jp4ly6115 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -264,13 +264,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:56:54Z' + timestamp: '2024-12-11T23:22:56Z' - torchscript_onnx_tflite: - inference_time: 2123.0 - throughput: 471.03155911446066 + inference_time: 2155.0 + throughput: 464.0371229698376 estimated_peak_memory_range: - min: 12288 - max: 16791280 + min: 28672 + max: 19567104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -278,14 +278,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 121 - job_id: jp4lmdj25 + job_id: jp8qed1kp job_status: Passed torchscript_onnx_qnn: - inference_time: 2267.0 - throughput: 441.11160123511246 + inference_time: 2313.0 + throughput: 432.33895373973195 estimated_peak_memory_range: - min: 241664 - max: 5834016 + min: 159744 + max: 10436768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -293,7 +293,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 151 - job_id: jpxk36m85 + job_id: jprvlm07g job_status: Passed reference_device_info: name: SA7255P ADP @@ -302,13 +302,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:56:58Z' + timestamp: '2024-12-11T23:23:00Z' - torchscript_onnx_tflite: - inference_time: 273.0 - throughput: 3663.003663003663 + inference_time: 272.0 + throughput: 3676.470588235294 estimated_peak_memory_range: - min: 28672 - max: 5236768 + min: 20480 + max: 10380600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -316,14 +316,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 121 - job_id: j5mno6v7p + job_id: j5q6lxvnp job_status: Passed torchscript_onnx_qnn: - inference_time: 307.0 - throughput: 3257.328990228013 + inference_time: 305.0 + throughput: 3278.688524590164 estimated_peak_memory_range: - min: 221184 - max: 1425144 + min: 225280 + max: 1384144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -331,7 +331,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 151 - job_id: jgn6omxj5 + job_id: jpy1okxlp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -340,13 +340,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:57:01Z' + timestamp: '2024-12-11T23:23:03Z' - torchscript_onnx_tflite: inference_time: 664.0 throughput: 1506.0240963855422 estimated_peak_memory_range: - min: 32768 - max: 14846080 + min: 12288 + max: 15052752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -354,14 +354,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 121 - job_id: jp2k4936p + job_id: j56y89w6p job_status: Passed torchscript_onnx_qnn: - inference_time: 749.0 - throughput: 1335.1134846461948 + inference_time: 950.0 + throughput: 1052.6315789473683 estimated_peak_memory_range: min: 0 - max: 5794368 + max: 5681152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -369,7 +369,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 151 - job_id: jp2k49j6p + job_id: jp8qedxop job_status: Passed reference_device_info: name: SA8295P ADP @@ -378,13 +378,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:57:02Z' + timestamp: '2024-12-11T23:23:05Z' - torchscript_onnx_tflite: - inference_time: 272.0 - throughput: 3676.470588235294 + inference_time: 276.0 + throughput: 3623.1884057971015 estimated_peak_memory_range: min: 12288 - max: 5009952 + max: 11263216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -392,14 +392,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 121 - job_id: jp0zd2k05 + job_id: jgo2l78qp job_status: Passed torchscript_onnx_qnn: inference_time: 305.0 throughput: 3278.688524590164 estimated_peak_memory_range: - min: 229376 - max: 1927120 + min: 233472 + max: 2015808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -407,7 +407,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 151 - job_id: jp0zd2d05 + job_id: jglvy9ym5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -416,13 +416,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:57:05Z' + timestamp: '2024-12-11T23:23:07Z' - torchscript_onnx_tflite: - inference_time: 617.0 - throughput: 1620.7455429497568 + inference_time: 606.0 + throughput: 1650.1650165016501 estimated_peak_memory_range: min: 12288 - max: 14522064 + max: 20346288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -430,14 +430,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 121 - job_id: jgkeoqdvg + job_id: jgjvr67vg job_status: Passed torchscript_onnx_qnn: - inference_time: 813.0 - throughput: 1230.0123001230013 + inference_time: 807.0 + throughput: 1239.1573729863692 estimated_peak_memory_range: min: 212992 - max: 5748528 + max: 6379840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -445,7 +445,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 151 - job_id: jgkeoqovg + job_id: jp3jzlzng job_status: Passed reference_device_info: name: SA8775P ADP @@ -454,13 +454,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:57:07Z' + timestamp: '2024-12-11T23:23:09Z' - torchscript_onnx_tflite: - inference_time: 321.0 - throughput: 3115.264797507788 + inference_time: 333.0 + throughput: 3003.003003003003 estimated_peak_memory_range: - min: 24576 - max: 19404560 + min: 393216 + max: 20262480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -468,14 +468,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 121 - job_id: jglvo2725 + job_id: jgz3lqmo5 job_status: Passed torchscript_onnx_qnn: - inference_time: 363.0 - throughput: 2754.8209366391184 + inference_time: 347.0 + throughput: 2881.844380403458 estimated_peak_memory_range: - min: 208896 - max: 20976672 + min: 221184 + max: 18782432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -483,7 +483,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 151 - job_id: jglvo2o25 + job_id: jpv6lylr5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -492,13 +492,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:57:09Z' + timestamp: '2024-12-11T23:23:11Z' - torchscript_onnx_qnn: - inference_time: 419.0 - throughput: 2386.634844868735 + inference_time: 408.0 + throughput: 2450.9803921568628 estimated_peak_memory_range: - min: 499712 - max: 499712 + min: 430080 + max: 430080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -506,7 +506,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 151 - job_id: j57ykl6n5 + job_id: j5mn01m9p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -515,17 +515,17 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:56:56Z' + timestamp: '2024-12-11T23:22:58Z' - name: MediaPipeFaceLandmarkDetector universal_assets: - torchscript_onnx_tflite: mq8kx6yzq + torchscript_onnx_tflite: mqyv38x7q performance_metrics: - torchscript_onnx_tflite: - inference_time: 186.0 - throughput: 5376.344086021505 + inference_time: 180.0 + throughput: 5555.555555555556 estimated_peak_memory_range: min: 16384 - max: 4068296 + max: 79737104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -533,14 +533,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: j56yrzqnp + job_id: jp14nk68p job_status: Passed torchscript_onnx_qnn: - inference_time: 220.0 - throughput: 4545.454545454545 + inference_time: 224.0 + throughput: 4464.285714285715 estimated_peak_memory_range: - min: 131072 - max: 10407624 + min: 126976 + max: 3486256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -548,7 +548,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: jgo2onm1p + job_id: jp14nkj8p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -557,13 +557,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:56:47Z' + timestamp: '2024-12-11T23:22:48Z' - torchscript_onnx_tflite: - inference_time: 129.0 - throughput: 7751.937984496124 + inference_time: 142.0 + throughput: 7042.2535211267605 estimated_peak_memory_range: - min: 12288 - max: 14124816 + min: 16384 + max: 15086576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -571,14 +571,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: jgo2one1p + job_id: j57ye19v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 162.0 - throughput: 6172.83950617284 + inference_time: 161.0 + throughput: 6211.180124223602 estimated_peak_memory_range: - min: 0 - max: 13009696 + min: 126976 + max: 14547312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -586,7 +586,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: jgjvo211g + job_id: j5wel07m5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -595,13 +595,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:56:49Z' + timestamp: '2024-12-11T23:22:50Z' - torchscript_onnx_tflite: - inference_time: 141.0 - throughput: 7092.198581560284 + inference_time: 145.0 + throughput: 6896.551724137931 estimated_peak_memory_range: - min: 36864 - max: 10496736 + min: 8192 + max: 10442064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -609,14 +609,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: jped8w485 + job_id: jpxkl8x35 job_status: Passed torchscript_onnx_qnn: - inference_time: 171.0 - throughput: 5847.953216374269 + inference_time: 167.0 + throughput: 5988.023952095808 estimated_peak_memory_range: min: 0 - max: 10817760 + max: 10772944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -624,7 +624,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: jgz38jw45 + job_id: jp14nkj7p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -633,13 +633,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:56:51Z' + timestamp: '2024-12-11T23:22:52Z' - torchscript_onnx_tflite: - inference_time: 406.0 - throughput: 2463.054187192118 + inference_time: 411.0 + throughput: 2433.0900243309 estimated_peak_memory_range: min: 12288 - max: 12544944 + max: 12189296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -647,14 +647,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: j5we83m45 + job_id: jgn6zdkk5 job_status: Passed torchscript_onnx_qnn: - inference_time: 498.0 - throughput: 2008.0321285140562 + inference_time: 497.0 + throughput: 2012.0724346076458 estimated_peak_memory_range: - min: 12288 - max: 8090096 + min: 131072 + max: 6826928 primary_compute_unit: NPU precision: fp16 layer_info: @@ -662,7 +662,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: jg9lky8mg + job_id: j57ye1495 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -671,13 +671,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T01:56:53Z' + timestamp: '2024-12-11T23:22:55Z' - torchscript_onnx_tflite: - inference_time: 2963.0 - throughput: 337.4957813027337 + inference_time: 3071.0 + throughput: 325.626831650928 estimated_peak_memory_range: - min: 12288 - max: 6507584 + min: 16384 + max: 2414432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -685,7 +685,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: jp147wqnp + job_id: jp2krqerp job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -694,13 +694,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T01:56:31Z' + timestamp: '2024-12-11T23:22:34Z' - torchscript_onnx_tflite: - inference_time: 180.0 - throughput: 5555.555555555556 + inference_time: 184.0 + throughput: 5434.782608695652 estimated_peak_memory_range: min: 12288 - max: 3579304 + max: 3959216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -708,14 +708,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: j57yklvn5 + job_id: jp0zm8695 job_status: Passed torchscript_onnx_qnn: - inference_time: 221.0 - throughput: 4524.886877828054 + inference_time: 215.0 + throughput: 4651.162790697675 estimated_peak_memory_range: min: 139264 - max: 1358640 + max: 1764720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -723,7 +723,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: jgdx8q06p + job_id: jpxkl84l5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -732,13 +732,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:56:55Z' + timestamp: '2024-12-11T23:22:57Z' - torchscript_onnx_tflite: - inference_time: 997.0 - throughput: 1003.0090270812437 + inference_time: 983.0 + throughput: 1017.293997965412 estimated_peak_memory_range: min: 16384 - max: 10734224 + max: 13840032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -746,14 +746,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: jpxk36e85 + job_id: jgke2w8wg job_status: Passed torchscript_onnx_qnn: - inference_time: 1197.0 - throughput: 835.421888053467 + inference_time: 1247.0 + throughput: 801.924619085806 estimated_peak_memory_range: min: 0 - max: 5917024 + max: 10365456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -761,7 +761,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: j5mno647p + job_id: jp2krqwqp job_status: Passed reference_device_info: name: SA7255P ADP @@ -770,13 +770,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:56:59Z' + timestamp: '2024-12-11T23:23:01Z' - torchscript_onnx_tflite: - inference_time: 180.0 - throughput: 5555.555555555556 + inference_time: 179.0 + throughput: 5586.592178770949 estimated_peak_memory_range: - min: 16384 - max: 9884864 + min: 12288 + max: 3574768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -784,14 +784,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: jprvo21kg + job_id: jglvy9lj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 222.0 - throughput: 4504.504504504504 + inference_time: 221.0 + throughput: 4524.886877828054 estimated_peak_memory_range: min: 143360 - max: 1404992 + max: 1686080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -799,7 +799,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: jprvo29kg + job_id: jp0zm8jn5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -808,13 +808,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:57:01Z' + timestamp: '2024-12-11T23:23:03Z' - torchscript_onnx_tflite: - inference_time: 482.0 - throughput: 2074.688796680498 + inference_time: 497.0 + throughput: 2012.0724346076458 estimated_peak_memory_range: min: 12288 - max: 9117184 + max: 9041792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -822,14 +822,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: jpy1qjv0p + job_id: jp3jzl63g job_status: Passed torchscript_onnx_qnn: - inference_time: 690.0 - throughput: 1449.2753623188405 + inference_time: 595.0 + throughput: 1680.672268907563 estimated_peak_memory_range: min: 0 - max: 5824288 + max: 5958736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -837,7 +837,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: jpy1qjn0p + job_id: jgke2w4ng job_status: Passed reference_device_info: name: SA8295P ADP @@ -846,13 +846,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:57:03Z' + timestamp: '2024-12-11T23:23:05Z' - torchscript_onnx_tflite: - inference_time: 187.0 - throughput: 5347.593582887701 + inference_time: 185.0 + throughput: 5405.405405405405 estimated_peak_memory_range: - min: 12288 - max: 10001800 + min: 16384 + max: 4560224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -860,14 +860,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: jp8q6m8qp + job_id: jpv6lydk5 job_status: Passed torchscript_onnx_qnn: - inference_time: 221.0 - throughput: 4524.886877828054 + inference_time: 217.0 + throughput: 4608.294930875576 estimated_peak_memory_range: - min: 135168 - max: 1630304 + min: 16384 + max: 1292000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -875,7 +875,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: jp8q6m6qp + job_id: j56y898yp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -884,13 +884,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:57:05Z' + timestamp: '2024-12-11T23:23:07Z' - torchscript_onnx_tflite: - inference_time: 445.0 - throughput: 2247.191011235955 + inference_time: 455.0 + throughput: 2197.802197802198 estimated_peak_memory_range: min: 16384 - max: 8890032 + max: 14077648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -898,14 +898,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: j5q6zrwep + job_id: jped70zo5 job_status: Passed torchscript_onnx_qnn: - inference_time: 630.0 - throughput: 1587.3015873015872 + inference_time: 664.0 + throughput: 1506.0240963855422 estimated_peak_memory_range: - min: 135168 - max: 5944144 + min: 126976 + max: 6258576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -913,7 +913,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: j5q6zrzep + job_id: jgo2l7lkp job_status: Passed reference_device_info: name: SA8775P ADP @@ -922,13 +922,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:57:07Z' + timestamp: '2024-12-11T23:23:10Z' - torchscript_onnx_tflite: - inference_time: 224.0 - throughput: 4464.285714285715 + inference_time: 214.0 + throughput: 4672.897196261682 estimated_peak_memory_range: - min: 32768 - max: 14570336 + min: 16384 + max: 16929760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -936,14 +936,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 117 - job_id: j56yrzvnp + job_id: j5wel0735 job_status: Passed torchscript_onnx_qnn: - inference_time: 261.0 - throughput: 3831.417624521073 + inference_time: 267.0 + throughput: 3745.318352059925 estimated_peak_memory_range: min: 126976 - max: 15528160 + max: 13832896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -951,7 +951,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: j56yrzrnp + job_id: jgjvr6reg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -960,13 +960,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:57:09Z' + timestamp: '2024-12-11T23:23:12Z' - torchscript_onnx_qnn: - inference_time: 337.0 - throughput: 2967.359050445104 + inference_time: 317.0 + throughput: 3154.5741324921137 estimated_peak_memory_range: - min: 593920 - max: 593920 + min: 618496 + max: 618496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -974,7 +974,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: jp4lmd825 + job_id: jgn6zdnq5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -983,4 +983,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:56:57Z' + timestamp: '2024-12-11T23:22:59Z' diff --git a/qai_hub_models/models/mediapipe_hand/export.py b/qai_hub_models/models/mediapipe_hand/export.py index 04bca52a..999b1dbc 100644 --- a/qai_hub_models/models/mediapipe_hand/export.py +++ b/qai_hub_models/models/mediapipe_hand/export.py @@ -238,9 +238,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser( - model_cls=Model, components=ALL_COMPONENTS, supports_qnn=False - ) + parser = export_parser(model_cls=Model, components=ALL_COMPONENTS) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/mediapipe_hand/perf.yaml b/qai_hub_models/models/mediapipe_hand/perf.yaml index 270a3f5b..ebbdfa34 100644 --- a/qai_hub_models/models/mediapipe_hand/perf.yaml +++ b/qai_hub_models/models/mediapipe_hand/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: MediaPipeHandDetector universal_assets: - torchscript_onnx_tflite: mq3e2ddkm - torchscript_onnx: mnwe1ye3n + torchscript_onnx_tflite: mnz1vp5xq + torchscript_onnx: mq36eg8lq performance_metrics: - torchscript_onnx_tflite: - inference_time: 722.0 - throughput: 1385.0415512465374 + inference_time: 708.0 + throughput: 1412.4293785310736 estimated_peak_memory_range: - min: 16384 - max: 22683976 + min: 24576 + max: 142495736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jgdx8qozp + job_id: jp0zm8k95 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 791.0 + throughput: 1264.2225031605562 + estimated_peak_memory_range: + min: 802816 + max: 22333968 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 195 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 195 + job_id: jgn6zdok5 job_status: Passed torchscript_onnx: - inference_time: 1182.0 - throughput: 846.0236886632825 + inference_time: 1169.0 + throughput: 855.4319931565441 estimated_peak_memory_range: min: 12288 - max: 6006064 + max: 6315120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 196 - job_id: jgz38jxx5 + job_id: jpxkl8d35 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -84,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:55:44Z' + timestamp: '2024-12-11T23:21:44Z' - torchscript_onnx_tflite: - inference_time: 518.0 - throughput: 1930.5019305019305 + inference_time: 520.0 + throughput: 1923.076923076923 estimated_peak_memory_range: min: 16384 - max: 19484960 + max: 21870080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -98,14 +113,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jp4lmdv15 + job_id: jgke2wdwg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 576.0 + throughput: 1736.111111111111 + estimated_peak_memory_range: + min: 802816 + max: 20645248 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 195 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 195 + job_id: jp2krq4rp job_status: Passed torchscript_onnx: - inference_time: 859.0 - throughput: 1164.1443538998835 + inference_time: 869.0 + throughput: 1150.7479861910242 estimated_peak_memory_range: min: 0 - max: 73507648 + max: 76091888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 196 - job_id: jg9lky38g + job_id: jgn6zd7k5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -122,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:55:46Z' + timestamp: '2024-12-11T23:21:46Z' - torchscript_onnx_tflite: - inference_time: 523.0 - throughput: 1912.0458891013384 + inference_time: 520.0 + throughput: 1923.076923076923 estimated_peak_memory_range: - min: 12288 - max: 16454160 + min: 0 + max: 20632528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,14 +166,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: j5mno639p + job_id: jglvy97j5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 497.0 + throughput: 2012.0724346076458 + estimated_peak_memory_range: + min: 0 + max: 20455984 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 195 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 195 + job_id: jp0zm8d95 job_status: Passed torchscript_onnx: - inference_time: 728.0 - throughput: 1373.6263736263736 + inference_time: 880.0 + throughput: 1136.3636363636363 estimated_peak_memory_range: min: 0 - max: 37756304 + max: 37177472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -151,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 196 - job_id: jgdx8qrzp + job_id: jp2krqvrp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -160,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:55:48Z' + timestamp: '2024-12-11T23:21:48Z' - torchscript_onnx_tflite: - inference_time: 713.0 - throughput: 1402.5245441795232 + inference_time: 714.0 + throughput: 1400.5602240896358 estimated_peak_memory_range: min: 16384 - max: 78225376 + max: 5827944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,7 +219,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jprvo2e7g + job_id: jp3jzl83g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 795.0 + throughput: 1257.861635220126 + estimated_peak_memory_range: + min: 823296 + max: 2562440 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 195 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 195 + job_id: jgke2wowg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -183,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:55:08Z' + timestamp: '2024-12-11T23:21:29Z' - torchscript_onnx_tflite: - inference_time: 24782.0 - throughput: 40.351868291501894 + inference_time: 24749.0 + throughput: 40.40567295648309 estimated_peak_memory_range: - min: 282624 - max: 17653584 + min: 249856 + max: 18726464 primary_compute_unit: NPU precision: fp16 layer_info: @@ -197,7 +257,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jpy1qj6lp + job_id: jpv6lyek5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 25027.0 + throughput: 39.95684660566588 + estimated_peak_memory_range: + min: 765952 + max: 11243808 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 195 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 195 + job_id: jp3jzlx3g job_status: Passed reference_device_info: name: SA7255P ADP @@ -206,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:55:09Z' + timestamp: '2024-12-11T23:21:33Z' - torchscript_onnx_tflite: - inference_time: 716.0 - throughput: 1396.6480446927374 + inference_time: 718.0 + throughput: 1392.757660167131 estimated_peak_memory_range: - min: 12288 - max: 22835224 + min: 20480 + max: 22503280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -220,7 +295,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jp8q6mzop + job_id: jped708o5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 801.0 + throughput: 1248.4394506866417 + estimated_peak_memory_range: + min: 819200 + max: 2099488 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 195 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 195 + job_id: jpv6ly9k5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -229,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:55:11Z' + timestamp: '2024-12-11T23:21:35Z' - torchscript_onnx_tflite: - inference_time: 1754.0 - throughput: 570.1254275940707 + inference_time: 1750.0 + throughput: 571.4285714285714 estimated_peak_memory_range: - min: 106496 - max: 12013136 + min: 12288 + max: 11882416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -243,7 +333,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: j5q6zr3op + job_id: j5wel0835 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 2082.0 + throughput: 480.3073967339097 + estimated_peak_memory_range: + min: 0 + max: 5971280 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 195 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 195 + job_id: jped70lo5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -252,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:55:13Z' + timestamp: '2024-12-11T23:21:37Z' - torchscript_onnx_tflite: - inference_time: 723.0 - throughput: 1383.1258644536654 + inference_time: 718.0 + throughput: 1392.757660167131 estimated_peak_memory_range: - min: 221184 - max: 22845216 + min: 20480 + max: 23149496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -266,7 +371,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: j56yrz1yp + job_id: jp14nk78p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 793.0 + throughput: 1261.034047919294 + estimated_peak_memory_range: + min: 16384 + max: 1713344 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 195 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 195 + job_id: j5wel0135 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -275,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:55:16Z' + timestamp: '2024-12-11T23:21:39Z' - torchscript_onnx_tflite: - inference_time: 1526.0 - throughput: 655.307994757536 + inference_time: 1516.0 + throughput: 659.6306068601583 estimated_peak_memory_range: - min: 172032 - max: 16354816 + min: 16384 + max: 21036128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -289,7 +409,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jgo2onvkp + job_id: j57ye1kv5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1722.0 + throughput: 580.7200929152149 + estimated_peak_memory_range: + min: 802816 + max: 6907088 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 195 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 195 + job_id: jp14nkv8p job_status: Passed reference_device_info: name: SA8775P ADP @@ -298,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:55:17Z' + timestamp: '2024-12-11T23:21:41Z' - torchscript_onnx_tflite: - inference_time: 1292.0 - throughput: 773.9938080495356 + inference_time: 1294.0 + throughput: 772.7975270479135 estimated_peak_memory_range: min: 16384 - max: 19431456 + max: 19801536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -312,7 +447,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jgjvo2leg + job_id: jpxkl8335 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1376.0 + throughput: 726.7441860465116 + estimated_peak_memory_range: + min: 802816 + max: 24072048 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 195 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 195 + job_id: j57ye17v5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -321,13 +471,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:55:19Z' - - torchscript_onnx: - inference_time: 1190.0 - throughput: 840.3361344537815 + timestamp: '2024-12-11T23:21:43Z' + - torchscript_onnx_qnn: + inference_time: 932.0 + throughput: 1072.961373390558 estimated_peak_memory_range: - min: 4288512 - max: 4288512 + min: 786432 + max: 786432 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 195 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 195 + job_id: jglvy9oj5 + job_status: Passed + torchscript_onnx: + inference_time: 1192.0 + throughput: 838.9261744966443 + estimated_peak_memory_range: + min: 5742592 + max: 5742592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -335,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 196 - job_id: jg9lky3mg + job_id: jp0zm8v95 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -344,18 +509,18 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:55:50Z' + timestamp: '2024-12-11T23:21:50Z' - name: MediaPipeHandLandmarkDetector universal_assets: - torchscript_onnx_tflite: mnjxkppyq - torchscript_onnx: mn1z83zzm + torchscript_onnx_tflite: mq36eg9lq + torchscript_onnx: mm6kvgd4n performance_metrics: - torchscript_onnx_tflite: - inference_time: 1019.0 - throughput: 981.3542688910696 + inference_time: 995.0 + throughput: 1005.0251256281407 estimated_peak_memory_range: - min: 24576 - max: 37742224 + min: 20480 + max: 9536152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -363,14 +528,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: j57yklx95 + job_id: jp8qed8kp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1133.0 + throughput: 882.61253309797 + estimated_peak_memory_range: + min: 49152 + max: 7576520 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 208 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 208 + job_id: jprvlmo0g job_status: Passed torchscript_onnx: - inference_time: 1544.0 - throughput: 647.6683937823834 + inference_time: 1523.0 + throughput: 656.5988181221273 estimated_peak_memory_range: - min: 12288 - max: 8044224 + min: 16384 + max: 28857432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -378,7 +558,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 209 - job_id: j5we83dm5 + job_id: j5mn01ddp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -387,13 +567,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:55:44Z' + timestamp: '2024-12-11T23:21:45Z' - torchscript_onnx_tflite: - inference_time: 731.0 - throughput: 1367.9890560875513 + inference_time: 749.0 + throughput: 1335.1134846461948 estimated_peak_memory_range: - min: 12288 - max: 18439632 + min: 16384 + max: 22486640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -401,14 +581,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jpxk36yl5 + job_id: j5q6lxwnp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 827.0 + throughput: 1209.1898428053205 + estimated_peak_memory_range: + min: 72998912 + max: 97668656 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 208 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 208 + job_id: jpy1okq8p job_status: Passed torchscript_onnx: - inference_time: 1135.0 - throughput: 881.0572687224669 + inference_time: 1127.0 + throughput: 887.3114463176574 estimated_peak_memory_range: min: 0 - max: 72313104 + max: 75117392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -416,7 +611,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 209 - job_id: jp147wd7p + job_id: jprvlmn0g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -425,13 +620,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:55:46Z' + timestamp: '2024-12-11T23:21:46Z' - torchscript_onnx_tflite: - inference_time: 698.0 - throughput: 1432.6647564469913 + inference_time: 722.0 + throughput: 1385.0415512465374 estimated_peak_memory_range: min: 12288 - max: 17536928 + max: 20605200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -439,14 +634,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jgn6om3q5 + job_id: j56y89v6p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 756.0 + throughput: 1322.7513227513227 + estimated_peak_memory_range: + min: 798720 + max: 23050992 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 208 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 208 + job_id: jp8qed6kp job_status: Passed torchscript_onnx: - inference_time: 1066.0 - throughput: 938.0863039399625 + inference_time: 1069.0 + throughput: 935.4536950420954 estimated_peak_memory_range: min: 0 - max: 41667472 + max: 42137696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -454,7 +664,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 209 - job_id: j5we83d45 + job_id: jpy1ok78p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -463,13 +673,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:55:48Z' + timestamp: '2024-12-11T23:21:48Z' - torchscript_onnx_tflite: - inference_time: 1001.0 - throughput: 999.000999000999 + inference_time: 1008.0 + throughput: 992.063492063492 estimated_peak_memory_range: - min: 0 - max: 6153664 + min: 16384 + max: 48920768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -477,7 +687,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jp2k49lqp + job_id: jgo2l7mqp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1090.0 + throughput: 917.4311926605504 + estimated_peak_memory_range: + min: 823296 + max: 1959896 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 208 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 208 + job_id: j5q6lxznp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -486,13 +711,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:55:08Z' + timestamp: '2024-12-11T23:21:29Z' - torchscript_onnx_tflite: - inference_time: 35495.0 - throughput: 28.17298211015636 + inference_time: 35509.0 + throughput: 28.161874454363684 estimated_peak_memory_range: - min: 65536 - max: 16895456 + min: 40960 + max: 18535728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +725,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jp0zd2ln5 + job_id: jgjvr6ovg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 35781.0 + throughput: 27.947793521701463 + estimated_peak_memory_range: + min: 782336 + max: 11156416 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 208 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 208 + job_id: jgo2l7oqp job_status: Passed reference_device_info: name: SA7255P ADP @@ -509,13 +749,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:55:10Z' + timestamp: '2024-12-11T23:21:33Z' - torchscript_onnx_tflite: - inference_time: 1018.0 - throughput: 982.3182711198428 + inference_time: 1005.0 + throughput: 995.0248756218906 estimated_peak_memory_range: - min: 16384 - max: 48829048 + min: 20480 + max: 6808904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -523,7 +763,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jgkeoq3ng + job_id: jgz3lq8o5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1094.0 + throughput: 914.0767824497258 + estimated_peak_memory_range: + min: 843776 + max: 2073784 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 208 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 208 + job_id: jgjvr6wvg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -532,13 +787,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:55:12Z' + timestamp: '2024-12-11T23:21:36Z' - torchscript_onnx_tflite: - inference_time: 2271.0 - throughput: 440.33465433729634 + inference_time: 2307.0 + throughput: 433.4633723450368 estimated_peak_memory_range: min: 16384 - max: 12594960 + max: 13110384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -546,7 +801,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jglvo2km5 + job_id: jg9lz7kwg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 2557.0 + throughput: 391.08330074305826 + estimated_peak_memory_range: + min: 0 + max: 5968560 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 208 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 208 + job_id: jgz3lq4o5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -555,13 +825,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:55:14Z' + timestamp: '2024-12-11T23:21:37Z' - torchscript_onnx_tflite: - inference_time: 1023.0 - throughput: 977.5171065493646 + inference_time: 1027.0 + throughput: 973.7098344693281 estimated_peak_memory_range: - min: 0 - max: 239632512 + min: 20480 + max: 92623064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -569,7 +839,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jp3jx1mng + job_id: jgdxdy8rp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1106.0 + throughput: 904.1591320072333 + estimated_peak_memory_range: + min: 819200 + max: 1966992 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 208 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 208 + job_id: jg9lz7xwg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -578,13 +863,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:55:16Z' + timestamp: '2024-12-11T23:21:39Z' - torchscript_onnx_tflite: - inference_time: 2213.0 - throughput: 451.8752824220515 + inference_time: 2224.0 + throughput: 449.64028776978415 estimated_peak_memory_range: - min: 16384 - max: 18224464 + min: 28672 + max: 21333664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -592,7 +877,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jpv6erwr5 + job_id: jp4ly6m85 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 2525.0 + throughput: 396.03960396039605 + estimated_peak_memory_range: + min: 802816 + max: 6936672 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 208 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 208 + job_id: jgdxdyzrp job_status: Passed reference_device_info: name: SA8775P ADP @@ -601,13 +901,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:55:18Z' + timestamp: '2024-12-11T23:21:41Z' - torchscript_onnx_tflite: - inference_time: 1795.0 - throughput: 557.1030640668523 + inference_time: 1817.0 + throughput: 550.357732526142 estimated_peak_memory_range: min: 16384 - max: 17026192 + max: 25387472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -615,7 +915,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jped8wvv5 + job_id: j5mn01odp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1889.0 + throughput: 529.3806246691371 + estimated_peak_memory_range: + min: 1126400 + max: 19021680 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 208 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 208 + job_id: jp4ly6985 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -624,13 +939,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:55:20Z' - - torchscript_onnx: - inference_time: 1649.0 - throughput: 606.4281382656155 + timestamp: '2024-12-11T23:21:43Z' + - torchscript_onnx_qnn: + inference_time: 1348.0 + throughput: 741.839762611276 + estimated_peak_memory_range: + min: 786432 + max: 786432 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 208 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 208 + job_id: j56y89r6p + job_status: Passed + torchscript_onnx: + inference_time: 1622.0 + throughput: 616.5228113440197 estimated_peak_memory_range: - min: 6778880 - max: 6778880 + min: 7913472 + max: 7913472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -638,7 +968,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 209 - job_id: jp147wdnp + job_id: jp8qed4kp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -647,4 +977,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:55:50Z' + timestamp: '2024-12-11T23:21:50Z' diff --git a/qai_hub_models/models/mediapipe_pose/export.py b/qai_hub_models/models/mediapipe_pose/export.py index b6cccf9d..20dafc39 100644 --- a/qai_hub_models/models/mediapipe_pose/export.py +++ b/qai_hub_models/models/mediapipe_pose/export.py @@ -238,9 +238,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser( - model_cls=Model, components=ALL_COMPONENTS, supports_qnn=False - ) + parser = export_parser(model_cls=Model, components=ALL_COMPONENTS) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/mediapipe_pose/perf.yaml b/qai_hub_models/models/mediapipe_pose/perf.yaml index 74834048..e43b29df 100644 --- a/qai_hub_models/models/mediapipe_pose/perf.yaml +++ b/qai_hub_models/models/mediapipe_pose/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: MediaPipePoseDetector universal_assets: - torchscript_onnx_tflite: mnlvk1p3m - torchscript_onnx: mqkk48pkq + torchscript_onnx_tflite: mnw8errkn + torchscript_onnx: mmr36556m performance_metrics: - torchscript_onnx_tflite: - inference_time: 777.0 - throughput: 1287.001287001287 + inference_time: 780.0 + throughput: 1282.051282051282 estimated_peak_memory_range: min: 16384 - max: 22853696 + max: 127318568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jgjvo2zvg + job_id: j57ye1jl5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 841.0 + throughput: 1189.0606420927468 + estimated_peak_memory_range: + min: 28672 + max: 6666472 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 138 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 138 + job_id: j5wel0m65 job_status: Passed torchscript_onnx: - inference_time: 997.0 - throughput: 1003.0090270812437 + inference_time: 998.0 + throughput: 1002.0040080160321 estimated_peak_memory_range: - min: 16384 - max: 3862464 + min: 212992 + max: 35299768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: jpy1qjjlp + job_id: jped70415 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -84,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:54:32Z' + timestamp: '2024-12-11T23:20:30Z' - torchscript_onnx_tflite: - inference_time: 560.0 - throughput: 1785.7142857142858 + inference_time: 558.0 + throughput: 1792.1146953405018 estimated_peak_memory_range: - min: 16384 - max: 17659376 + min: 28127232 + max: 47456512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -98,14 +113,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jgz38joo5 + job_id: jpxkl8715 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 609.0 + throughput: 1642.0361247947455 + estimated_peak_memory_range: + min: 0 + max: 21406960 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 138 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 138 + job_id: jp14nkq2p job_status: Passed torchscript_onnx: - inference_time: 744.0 - throughput: 1344.0860215053763 + inference_time: 738.0 + throughput: 1355.0135501355014 estimated_peak_memory_range: min: 0 - max: 52076336 + max: 52144800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: jp8q6mmop + job_id: j5wel0x65 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -122,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:54:33Z' + timestamp: '2024-12-11T23:20:32Z' - torchscript_onnx_tflite: - inference_time: 570.0 - throughput: 1754.3859649122808 + inference_time: 562.0 + throughput: 1779.3594306049822 estimated_peak_memory_range: min: 12288 - max: 12989184 + max: 17830032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,14 +166,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jg9lkyjwg + job_id: jgn6zd9r5 job_status: Passed - torchscript_onnx: - inference_time: 756.0 - throughput: 1322.7513227513227 + torchscript_onnx_qnn: + inference_time: 497.0 + throughput: 2012.0724346076458 estimated_peak_memory_range: min: 0 - max: 26774384 + max: 17425792 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 138 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 138 + job_id: j57ye1vl5 + job_status: Passed + torchscript_onnx: + inference_time: 754.0 + throughput: 1326.2599469496022 + estimated_peak_memory_range: + min: 65536 + max: 27075712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -151,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: j5q6zrrop + job_id: jp14nk32p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -160,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:54:35Z' + timestamp: '2024-12-11T23:20:34Z' - torchscript_onnx_tflite: - inference_time: 770.0 - throughput: 1298.7012987012988 + inference_time: 768.0 + throughput: 1302.0833333333333 estimated_peak_memory_range: - min: 24576 - max: 60987112 + min: 20480 + max: 5165248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,7 +219,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jgdx8qerp + job_id: jp2krq74p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 821.0 + throughput: 1218.026796589525 + estimated_peak_memory_range: + min: 229376 + max: 1585912 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 138 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 138 + job_id: jpxkl8e15 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -183,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:53:58Z' + timestamp: '2024-12-11T23:20:15Z' - torchscript_onnx_tflite: - inference_time: 37961.0 - throughput: 26.342825531466506 + inference_time: 37959.0 + throughput: 26.34421349350615 estimated_peak_memory_range: - min: 98304 - max: 13676320 + min: 106496 + max: 17386784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -197,7 +257,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jg9lkyj8g + job_id: jp0zm8165 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 38175.0 + throughput: 26.195153896529142 + estimated_peak_memory_range: + min: 167936 + max: 10754016 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 138 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 138 + job_id: jp2krq34p job_status: Passed reference_device_info: name: SA7255P ADP @@ -206,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:54:00Z' + timestamp: '2024-12-11T23:20:19Z' - torchscript_onnx_tflite: - inference_time: 772.0 - throughput: 1295.3367875647668 + inference_time: 782.0 + throughput: 1278.772378516624 estimated_peak_memory_range: - min: 16384 - max: 7331480 + min: 28672 + max: 6964888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -220,7 +295,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jgdx8qezp + job_id: jgke2wl2g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 823.0 + throughput: 1215.0668286755772 + estimated_peak_memory_range: + min: 225280 + max: 1514768 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 138 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 138 + job_id: jp0zm8e65 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -229,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:54:02Z' + timestamp: '2024-12-11T23:20:21Z' - torchscript_onnx_tflite: - inference_time: 2335.0 - throughput: 428.2655246252677 + inference_time: 2350.0 + throughput: 425.531914893617 estimated_peak_memory_range: min: 16384 - max: 10406688 + max: 10077856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -243,7 +333,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jp4lmdk15 + job_id: jglvy9085 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 2634.0 + throughput: 379.65072133637057 + estimated_peak_memory_range: + min: 0 + max: 5639712 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 138 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 138 + job_id: jgke2wr2g job_status: Passed reference_device_info: name: SA8295P ADP @@ -252,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:54:04Z' + timestamp: '2024-12-11T23:20:23Z' - torchscript_onnx_tflite: - inference_time: 772.0 - throughput: 1295.3367875647668 + inference_time: 774.0 + throughput: 1291.9896640826873 estimated_peak_memory_range: - min: 16384 - max: 95384904 + min: 32768 + max: 12164704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -266,7 +371,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: j5mno6q9p + job_id: jp3jzl4lg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 820.0 + throughput: 1219.5121951219512 + estimated_peak_memory_range: + min: 258048 + max: 2039192 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 138 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 138 + job_id: jglvy9e85 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -275,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:54:06Z' + timestamp: '2024-12-11T23:20:25Z' - torchscript_onnx_tflite: - inference_time: 1784.0 - throughput: 560.5381165919282 + inference_time: 1786.0 + throughput: 559.9104143337066 estimated_peak_memory_range: - min: 28672 - max: 11711472 + min: 61440 + max: 18216096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -289,7 +409,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jprvo287g + job_id: jpv6ly1j5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1952.0 + throughput: 512.2950819672132 + estimated_peak_memory_range: + min: 0 + max: 6099088 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 138 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 138 + job_id: jp3jzlqlg job_status: Passed reference_device_info: name: SA8775P ADP @@ -298,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:54:07Z' + timestamp: '2024-12-11T23:20:27Z' - torchscript_onnx_tflite: - inference_time: 1900.0 - throughput: 526.3157894736842 + inference_time: 1896.0 + throughput: 527.4261603375527 estimated_peak_memory_range: - min: 61440 - max: 14497520 + min: 65536 + max: 20746224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -312,7 +447,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jpy1qjrlp + job_id: jped70r15 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1979.0 + throughput: 505.3057099545225 + estimated_peak_memory_range: + min: 204800 + max: 14660096 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 138 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 138 + job_id: jpv6lyzj5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -321,13 +471,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:54:09Z' - - torchscript_onnx: - inference_time: 1073.0 - throughput: 931.9664492078285 + timestamp: '2024-12-11T23:20:29Z' + - torchscript_onnx_qnn: + inference_time: 972.0 + throughput: 1028.80658436214 + estimated_peak_memory_range: + min: 450560 + max: 450560 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 138 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 138 + job_id: jgn6zdrr5 + job_status: Passed + torchscript_onnx: + inference_time: 1042.0 + throughput: 959.6928982725528 estimated_peak_memory_range: - min: 4243456 - max: 4243456 + min: 3973120 + max: 3973120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -335,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: j56yrznyp + job_id: j5wel0x35 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -344,18 +509,18 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:54:37Z' + timestamp: '2024-12-11T23:20:36Z' - name: MediaPipePoseLandmarkDetector universal_assets: - torchscript_onnx_tflite: mno35gwkn - torchscript_onnx: mq3e2d3km + torchscript_onnx_tflite: mq8dkgr3m + torchscript_onnx: mn1wzggvm performance_metrics: - torchscript_onnx_tflite: - inference_time: 821.0 - throughput: 1218.026796589525 + inference_time: 811.0 + throughput: 1233.0456226880394 estimated_peak_memory_range: - min: 20480 - max: 46160520 + min: 16384 + max: 8390640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -363,14 +528,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jped8weo5 + job_id: jp4ly6xv5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 907.0 + throughput: 1102.5358324145534 + estimated_peak_memory_range: + min: 344064 + max: 35486144 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 290 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 290 + job_id: jg9lz79lg job_status: Passed torchscript_onnx: - inference_time: 1309.0 - throughput: 763.9419404125287 + inference_time: 1315.0 + throughput: 760.4562737642585 estimated_peak_memory_range: min: 12288 - max: 45934928 + max: 9419080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -378,7 +558,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 291 - job_id: jp0zd22n5 + job_id: jgz3lqvk5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -387,13 +567,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:54:32Z' + timestamp: '2024-12-11T23:20:31Z' - torchscript_onnx_tflite: inference_time: 603.0 throughput: 1658.374792703151 estimated_peak_memory_range: min: 12288 - max: 23368144 + max: 25693040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -401,14 +581,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: j5we83235 + job_id: j5mn01wwp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 658.0 + throughput: 1519.756838905775 + estimated_peak_memory_range: + min: 0 + max: 26669504 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 290 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 290 + job_id: jgdxdy7ep job_status: Passed torchscript_onnx: - inference_time: 968.0 - throughput: 1033.0578512396694 + inference_time: 973.0 + throughput: 1027.749229188078 estimated_peak_memory_range: - min: 172032 - max: 102465040 + min: 0 + max: 103782608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -416,7 +611,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 291 - job_id: jgkeoqqng + job_id: jg9lz78lg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -425,13 +620,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:54:34Z' + timestamp: '2024-12-11T23:20:32Z' - torchscript_onnx_tflite: - inference_time: 553.0 - throughput: 1808.3182640144666 + inference_time: 552.0 + throughput: 1811.5942028985507 estimated_peak_memory_range: - min: 8192 - max: 19978544 + min: 12288 + max: 23774416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -439,14 +634,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jp147wy8p + job_id: jprvlm49g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 610.0 + throughput: 1639.344262295082 + estimated_peak_memory_range: + min: 0 + max: 23369600 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 290 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 290 + job_id: jp4ly6jv5 job_status: Passed torchscript_onnx: - inference_time: 912.0 - throughput: 1096.4912280701753 + inference_time: 911.0 + throughput: 1097.694840834248 estimated_peak_memory_range: min: 0 - max: 46905664 + max: 47374480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -454,7 +664,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 291 - job_id: jglvo23m5 + job_id: jgdxdy0ep job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -463,13 +673,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:54:36Z' + timestamp: '2024-12-11T23:20:34Z' - torchscript_onnx_tflite: - inference_time: 801.0 - throughput: 1248.4394506866417 + inference_time: 799.0 + throughput: 1251.5644555694619 estimated_peak_memory_range: - min: 16384 - max: 45654120 + min: 20480 + max: 10988608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -477,7 +687,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: j5we832m5 + job_id: jpy1ok47p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 911.0 + throughput: 1097.694840834248 + estimated_peak_memory_range: + min: 819200 + max: 2034496 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 290 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 290 + job_id: j5mn01vwp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -486,13 +711,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:53:59Z' + timestamp: '2024-12-11T23:20:15Z' - torchscript_onnx_tflite: - inference_time: 17243.0 - throughput: 57.99454851243983 + inference_time: 17242.0 + throughput: 57.997912075165296 estimated_peak_memory_range: - min: 20480 - max: 19458368 + min: 32768 + max: 21488784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +725,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jp147wy7p + job_id: jp8qed3xp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 17628.0 + throughput: 56.72793283412752 + estimated_peak_memory_range: + min: 757760 + max: 11370032 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 290 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 290 + job_id: jpy1okv7p job_status: Passed reference_device_info: name: SA7255P ADP @@ -509,13 +749,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:54:01Z' + timestamp: '2024-12-11T23:20:19Z' - torchscript_onnx_tflite: - inference_time: 803.0 - throughput: 1245.3300124533 + inference_time: 832.0 + throughput: 1201.923076923077 estimated_peak_memory_range: min: 16384 - max: 11168744 + max: 198801696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -523,7 +763,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: j57ykl095 + job_id: j5q6lx74p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 947.0 + throughput: 1055.9662090813094 + estimated_peak_memory_range: + min: 827392 + max: 1978112 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 290 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 290 + job_id: jp8qedwxp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -532,13 +787,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:54:03Z' + timestamp: '2024-12-11T23:20:21Z' - torchscript_onnx_tflite: - inference_time: 1417.0 - throughput: 705.7163020465773 + inference_time: 1500.0 + throughput: 666.6666666666666 estimated_peak_memory_range: min: 16384 - max: 15180768 + max: 14677184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -546,7 +801,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jpxk36nl5 + job_id: j56y8930p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1839.0 + throughput: 543.773790103317 + estimated_peak_memory_range: + min: 802816 + max: 7049792 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 290 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 290 + job_id: j5q6lx94p job_status: Passed reference_device_info: name: SA8295P ADP @@ -555,13 +825,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:54:04Z' + timestamp: '2024-12-11T23:20:23Z' - torchscript_onnx_tflite: - inference_time: 832.0 - throughput: 1201.923076923077 + inference_time: 815.0 + throughput: 1226.993865030675 estimated_peak_memory_range: - min: 16384 - max: 245486296 + min: 20480 + max: 46924240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -569,7 +839,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jgn6omlq5 + job_id: jgo2l71xp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 936.0 + throughput: 1068.3760683760684 + estimated_peak_memory_range: + min: 20480 + max: 1295336 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 290 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 290 + job_id: j56y89q0p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -578,13 +863,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:54:06Z' + timestamp: '2024-12-11T23:20:25Z' - torchscript_onnx_tflite: - inference_time: 1629.0 - throughput: 613.8735420503376 + inference_time: 1638.0 + throughput: 610.5006105006105 estimated_peak_memory_range: min: 16384 - max: 20679984 + max: 23538560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -592,7 +877,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jp2k490qp + job_id: jgjvr60xg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1938.0 + throughput: 515.9958720330237 + estimated_peak_memory_range: + min: 802816 + max: 6902096 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 290 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 290 + job_id: jgo2l7exp job_status: Passed reference_device_info: name: SA8775P ADP @@ -601,13 +901,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:54:08Z' + timestamp: '2024-12-11T23:20:27Z' - torchscript_onnx_tflite: - inference_time: 1004.0 - throughput: 996.01593625498 + inference_time: 1030.0 + throughput: 970.8737864077669 estimated_peak_memory_range: min: 16384 - max: 18189824 + max: 23016176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -615,7 +915,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jp0zd23n5 + job_id: jgz3lqxk5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1110.0 + throughput: 900.9009009009009 + estimated_peak_memory_range: + min: 802816 + max: 22529296 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 290 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 290 + job_id: jgjvr6kxg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -624,13 +939,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:54:09Z' - - torchscript_onnx: - inference_time: 1389.0 - throughput: 719.9424046076314 + timestamp: '2024-12-11T23:20:29Z' + - torchscript_onnx_qnn: + inference_time: 1108.0 + throughput: 902.5270758122743 + estimated_peak_memory_range: + min: 786432 + max: 786432 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 290 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 290 + job_id: jprvlm19g + job_status: Passed + torchscript_onnx: + inference_time: 1352.0 + throughput: 739.6449704142012 estimated_peak_memory_range: - min: 8175616 - max: 8175616 + min: 9560064 + max: 9560064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -638,7 +968,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 291 - job_id: jp3jx1eng + job_id: jg9lz78wg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -647,4 +977,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:54:38Z' + timestamp: '2024-12-11T23:20:36Z' diff --git a/qai_hub_models/models/mediapipe_selfie/perf.yaml b/qai_hub_models/models/mediapipe_selfie/perf.yaml index 997f8fa3..e6f1a539 100644 --- a/qai_hub_models/models/mediapipe_selfie/perf.yaml +++ b/qai_hub_models/models/mediapipe_selfie/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: MediaPipe-Selfie-Segmentation universal_assets: - torchscript_onnx_tflite: mqex93jvn - torchscript_onnx: mn41r42zn + torchscript_onnx_tflite: mq21435jm + torchscript_onnx: mmxe79o2n performance_metrics: - torchscript_onnx_tflite: - inference_time: 706.0 - throughput: 1416.4305949008499 + inference_time: 701.0 + throughput: 1426.5335235378031 estimated_peak_memory_range: - min: 20480 - max: 42377640 + min: 16384 + max: 6381488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: j5q6zr6np + job_id: jped70k15 job_status: Passed torchscript_onnx_qnn: - inference_time: 777.0 - throughput: 1287.001287001287 + inference_time: 772.0 + throughput: 1295.3367875647668 estimated_peak_memory_range: - min: 806912 - max: 6694256 + min: 811008 + max: 71873360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jg9lky0wg + job_id: jgn6zder5 job_status: Passed torchscript_onnx: - inference_time: 1331.0 - throughput: 751.3148009015778 + inference_time: 1329.0 + throughput: 752.4454477050414 estimated_peak_memory_range: - min: 487424 - max: 1970688 + min: 802816 + max: 25757328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jp0zd2495 + job_id: jgo2l7vxp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:53:27Z' + timestamp: '2024-12-11T23:19:25Z' - torchscript_onnx_tflite: - inference_time: 468.0 - throughput: 2136.7521367521367 + inference_time: 470.0 + throughput: 2127.659574468085 estimated_peak_memory_range: min: 12288 - max: 13662640 + max: 14153056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jglvo2vj5 + job_id: jgz3lqrk5 job_status: Passed torchscript_onnx_qnn: - inference_time: 518.0 - throughput: 1930.5019305019305 + inference_time: 519.0 + throughput: 1926.7822736030828 estimated_peak_memory_range: min: 0 - max: 14979184 + max: 13647616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jp147w28p + job_id: jprvlmy9g job_status: Passed torchscript_onnx: - inference_time: 883.0 - throughput: 1132.5028312570782 + inference_time: 891.0 + throughput: 1122.334455667789 estimated_peak_memory_range: min: 0 - max: 37497712 + max: 37529616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jp8q6m2kp + job_id: jpv6lywj5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:53:28Z' + timestamp: '2024-12-11T23:19:26Z' - torchscript_onnx_tflite: - inference_time: 441.0 - throughput: 2267.573696145125 + inference_time: 367.0 + throughput: 2724.7956403269754 estimated_peak_memory_range: - min: 12288 - max: 10853424 + min: 8192 + max: 11580288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: j56yrzy6p + job_id: j5wel0965 job_status: Passed torchscript_onnx_qnn: - inference_time: 483.0 - throughput: 2070.3933747412007 + inference_time: 405.0 + throughput: 2469.135802469136 estimated_peak_memory_range: min: 0 - max: 10405952 + max: 10933488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jgdx8qnrp + job_id: jp2krqm4p job_status: Passed torchscript_onnx: - inference_time: 872.0 - throughput: 1146.788990825688 + inference_time: 864.0 + throughput: 1157.4074074074074 estimated_peak_memory_range: min: 0 - max: 26037392 + max: 26264640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jgkeoqvwg + job_id: jgjvr6lxg job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:53:29Z' + timestamp: '2024-12-11T23:19:27Z' - torchscript_onnx_tflite: - inference_time: 694.0 - throughput: 1440.922190201729 + inference_time: 697.0 + throughput: 1434.7202295552368 estimated_peak_memory_range: min: 24576 - max: 4318304 + max: 6537720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jp3jx1j3g + job_id: jg9lz74lg job_status: Passed torchscript_onnx_qnn: - inference_time: 758.0 - throughput: 1319.2612137203166 + inference_time: 762.0 + throughput: 1312.3359580052493 estimated_peak_memory_range: - min: 827392 - max: 2056216 + min: 819200 + max: 2082184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: j57ykl2v5 + job_id: jpy1okd7p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:53:17Z' + timestamp: '2024-12-11T23:19:16Z' - torchscript_onnx_tflite: - inference_time: 6211.0 - throughput: 161.00466913540492 + inference_time: 6284.0 + throughput: 159.13430935709738 estimated_peak_memory_range: - min: 278528 - max: 11319664 + min: 294912 + max: 14191088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jgo2on2qp + job_id: jp14nk82p job_status: Passed torchscript_onnx_qnn: - inference_time: 6499.0 - throughput: 153.86982612709647 + inference_time: 6477.0 + throughput: 154.3924656476764 estimated_peak_memory_range: - min: 737280 - max: 6396832 + min: 757760 + max: 11339312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jpxk36935 + job_id: jp8qed7xp job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:53:20Z' + timestamp: '2024-12-11T23:19:18Z' - torchscript_onnx_tflite: - inference_time: 706.0 - throughput: 1416.4305949008499 + inference_time: 695.0 + throughput: 1438.8489208633093 estimated_peak_memory_range: - min: 20480 - max: 7072184 + min: 16384 + max: 73490784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jpv6erqk5 + job_id: jgdxdyvep job_status: Passed torchscript_onnx_qnn: - inference_time: 785.0 - throughput: 1273.8853503184714 + inference_time: 770.0 + throughput: 1298.7012987012988 estimated_peak_memory_range: - min: 819200 - max: 2159032 + min: 81920 + max: 1269248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: j5mno6edp + job_id: jgke2wy2g job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:53:21Z' + timestamp: '2024-12-11T23:19:19Z' - torchscript_onnx_tflite: - inference_time: 1280.0 - throughput: 781.25 + inference_time: 1297.0 + throughput: 771.0100231303007 estimated_peak_memory_range: - min: 12288 - max: 9273584 + min: 16384 + max: 9661392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jgjvo2dvg + job_id: j57ye1dl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1472.0 - throughput: 679.3478260869565 + inference_time: 1429.0 + throughput: 699.7900629811056 estimated_peak_memory_range: - min: 0 - max: 5838688 + min: 802816 + max: 6663968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jgn6om0k5 + job_id: j5q6lx24p job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:53:22Z' + timestamp: '2024-12-11T23:19:21Z' - torchscript_onnx_tflite: inference_time: 703.0 throughput: 1422.475106685633 estimated_peak_memory_range: - min: 20480 - max: 71375880 + min: 12288 + max: 5052056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jped8woo5 + job_id: jp4ly6wv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 761.0 - throughput: 1314.060446780552 + inference_time: 769.0 + throughput: 1300.3901170351105 estimated_peak_memory_range: - min: 32768 - max: 1670352 + min: 843776 + max: 2344440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jprvo260g + job_id: jglvy9k85 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:53:24Z' + timestamp: '2024-12-11T23:19:22Z' - torchscript_onnx_tflite: - inference_time: 1197.0 - throughput: 835.421888053467 + inference_time: 1220.0 + throughput: 819.672131147541 estimated_peak_memory_range: min: 16384 - max: 8967744 + max: 13844624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jgz38j2o5 + job_id: jpxkl8115 job_status: Passed torchscript_onnx_qnn: - inference_time: 1426.0 - throughput: 701.2622720897616 + inference_time: 1452.0 + throughput: 688.7052341597796 estimated_peak_memory_range: min: 802816 - max: 6113680 + max: 6797168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jp2k49xrp + job_id: j56y8910p job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:53:25Z' + timestamp: '2024-12-11T23:19:23Z' - torchscript_onnx_tflite: inference_time: 935.0 throughput: 1069.51871657754 estimated_peak_memory_range: - min: 16384 - max: 14823552 + min: 36864 + max: 14288320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: j5we83w35 + job_id: j5mn01zwp job_status: Passed torchscript_onnx_qnn: - inference_time: 987.0 - throughput: 1013.1712259371834 + inference_time: 1002.0 + throughput: 998.003992015968 estimated_peak_memory_range: min: 802816 - max: 17071664 + max: 18601184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jpy1qjz8p + job_id: jp3jzlmlg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:53:26Z' + timestamp: '2024-12-11T23:19:24Z' - torchscript_onnx_qnn: - inference_time: 904.0 - throughput: 1106.1946902654868 + inference_time: 890.0 + throughput: 1123.5955056179776 estimated_peak_memory_range: min: 786432 max: 786432 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jp4lmdn85 + job_id: jp0zm8r65 job_status: Passed torchscript_onnx: - inference_time: 5938.0 - throughput: 168.40687100033682 + inference_time: 1330.0 + throughput: 751.8796992481203 estimated_peak_memory_range: - min: 1982464 - max: 1982464 + min: 1970176 + max: 1970176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: j5q6zr0np + job_id: jped70v15 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:53:30Z' + timestamp: '2024-12-11T23:19:28Z' diff --git a/qai_hub_models/models/midas/__init__.py b/qai_hub_models/models/midas/__init__.py index 10b63ec3..d9025e1a 100644 --- a/qai_hub_models/models/midas/__init__.py +++ b/qai_hub_models/models/midas/__init__.py @@ -2,6 +2,9 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- -from .app import MidasApp as App # noqa: F401 +from qai_hub_models.models._shared.depth_estimation.app import ( # noqa: F401 + DepthEstimationApp as App, +) + from .model import MODEL_ID # noqa: F401 from .model import Midas as Model # noqa: F401 diff --git a/qai_hub_models/models/midas/demo.py b/qai_hub_models/models/midas/demo.py index 956d0079..174f06ae 100644 --- a/qai_hub_models/models/midas/demo.py +++ b/qai_hub_models/models/midas/demo.py @@ -4,16 +4,9 @@ # --------------------------------------------------------------------- from __future__ import annotations -from qai_hub_models.models.midas.app import MidasApp +from qai_hub_models.models._shared.depth_estimation.demo import depth_estimation_demo from qai_hub_models.models.midas.model import MODEL_ASSET_VERSION, MODEL_ID, Midas -from qai_hub_models.utils.args import ( - demo_model_from_cli_args, - get_model_cli_parser, - get_on_device_demo_parser, - validate_on_device_demo_args, -) -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image -from qai_hub_models.utils.display import display_or_save_image +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset # Demo image comes from https://github.com/pytorch/hub/raw/master/images/dog.jpg INPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( @@ -21,39 +14,8 @@ ) -# Run Midas end-to-end on a sample image. -# The demo will display a heatmap of the estimated depth at each point in the image. -def midas_demo(model_cls: type[Midas], is_test: bool = False): - # Demo parameters - parser = get_model_cli_parser(model_cls) - parser = get_on_device_demo_parser(parser, add_output_dir=True) - parser.add_argument( - "--image", - type=str, - default=INPUT_IMAGE_ADDRESS, - help="image file path or URL", - ) - args = parser.parse_args([] if is_test else None) - model = demo_model_from_cli_args(model_cls, MODEL_ID, args) - validate_on_device_demo_args(args, MODEL_ID) - - # Load image - (_, _, height, width) = model_cls.get_input_spec()["image"][0] - image = load_image(args.image) - print("Model Loaded") - - app = MidasApp(model, height, width) - heatmap_image = app.estimate_depth(image) - - if not is_test: - # Resize / unpad annotated image - display_or_save_image( - heatmap_image, args.output_dir, "midas_heatmap.png", "heatmap" - ) - - def main(is_test: bool = False): - return midas_demo(model_cls=Midas, is_test=is_test) + depth_estimation_demo(Midas, MODEL_ID, INPUT_IMAGE_ADDRESS, is_test) if __name__ == "__main__": diff --git a/qai_hub_models/models/midas/perf.yaml b/qai_hub_models/models/midas/perf.yaml index c08e5420..93cb0828 100644 --- a/qai_hub_models/models/midas/perf.yaml +++ b/qai_hub_models/models/midas/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: Midas-V2 universal_assets: - torchscript_onnx_tflite: mq3e2d76m - torchscript_onnx: mnwezp1xn + torchscript_onnx_tflite: mnw8erk3n + torchscript_onnx: mnz1vpj6q performance_metrics: - torchscript_onnx_tflite: - inference_time: 3249.0 - throughput: 307.7870113881194 + inference_time: 3235.0 + throughput: 309.1190108191654 estimated_peak_memory_range: - min: 81920 - max: 19885576 + min: 16384 + max: 53503848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jgn6omvk5 + job_id: jgo2l7ndp job_status: Passed torchscript_onnx_qnn: - inference_time: 3275.0 - throughput: 305.3435114503817 + inference_time: 3289.0 + throughput: 304.04378230465187 estimated_peak_memory_range: - min: 12288 - max: 115876984 + min: 16384 + max: 94819816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jp3jx103g + job_id: jg9lz7wlg job_status: Passed torchscript_onnx: - inference_time: 3304.0 - throughput: 302.6634382566586 + inference_time: 3272.0 + throughput: 305.6234718826406 estimated_peak_memory_range: - min: 16384 - max: 42984392 + min: 802816 + max: 3091520 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 199 - job_id: jp4lmdl85 + job_id: jp0zm8l65 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:52:42Z' + timestamp: '2024-12-11T23:18:42Z' - torchscript_onnx_tflite: - inference_time: 2269.0 - throughput: 440.72278536800354 + inference_time: 2279.0 + throughput: 438.7889425186485 estimated_peak_memory_range: - min: 16384 - max: 28640192 + min: 32768 + max: 29900784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jprvo230g + job_id: jpv6lyrm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2308.0 - throughput: 433.27556325823224 + inference_time: 2317.0 + throughput: 431.59257660768236 estimated_peak_memory_range: - min: 802816 - max: 26175344 + min: 0 + max: 29768224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jgo2on6qp + job_id: jp14nke2p job_status: Passed torchscript_onnx: - inference_time: 2539.0 - throughput: 393.8558487593541 + inference_time: 2389.0 + throughput: 418.5851820845542 estimated_peak_memory_range: min: 0 - max: 97196688 + max: 100714960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 199 - job_id: jpxk36k35 + job_id: jp8qedzxp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:52:43Z' + timestamp: '2024-12-11T23:18:43Z' - torchscript_onnx_tflite: - inference_time: 1780.0 - throughput: 561.7977528089888 + inference_time: 2103.0 + throughput: 475.51117451260103 estimated_peak_memory_range: min: 12288 - max: 23903728 + max: 26158368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jp2k49yrp + job_id: jgjvr628g job_status: Passed torchscript_onnx_qnn: - inference_time: 2170.0 - throughput: 460.8294930875576 + inference_time: 2114.0 + throughput: 473.0368968779565 estimated_peak_memory_range: min: 0 - max: 23387456 + max: 24943120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jpv6er6k5 + job_id: jgdxdyoep job_status: Passed torchscript_onnx: - inference_time: 2226.0 - throughput: 449.23629829290206 + inference_time: 2218.0 + throughput: 450.8566275924256 estimated_peak_memory_range: min: 0 - max: 45599664 + max: 48290336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 199 - job_id: j5mno6ndp + job_id: jgke2w32g job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:52:44Z' + timestamp: '2024-12-11T23:18:44Z' - torchscript_onnx_tflite: - inference_time: 3213.0 - throughput: 311.2356053532524 + inference_time: 3211.0 + throughput: 311.42946122703205 estimated_peak_memory_range: min: 16384 - max: 53410104 + max: 54112432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jpy1qj38p + job_id: jped70w05 job_status: Passed torchscript_onnx_qnn: - inference_time: 3027.0 - throughput: 330.3600925008259 + inference_time: 3030.0 + throughput: 330.03300330033005 estimated_peak_memory_range: - min: 819200 - max: 2093280 + min: 860160 + max: 2122176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jgjvo2vvg + job_id: j57ye1xl5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:52:33Z' + timestamp: '2024-12-11T23:18:33Z' - torchscript_onnx_tflite: - inference_time: 84525.0 - throughput: 11.830819284235433 + inference_time: 84508.0 + throughput: 11.833199223742131 estimated_peak_memory_range: - min: 516096 - max: 23006368 + min: 356352 + max: 24631648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jp0zd2095 + job_id: jgz3lqj65 job_status: Passed torchscript_onnx_qnn: - inference_time: 84275.0 - throughput: 11.865915158706615 + inference_time: 84298.0 + throughput: 11.862677643597713 estimated_peak_memory_range: - min: 888832 - max: 6747280 + min: 884736 + max: 11290544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jgz38j3o5 + job_id: jpxkl8y15 job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:52:35Z' + timestamp: '2024-12-11T23:18:35Z' - torchscript_onnx_tflite: - inference_time: 3225.0 - throughput: 310.07751937984494 + inference_time: 3240.0 + throughput: 308.641975308642 estimated_peak_memory_range: min: 16384 - max: 42360680 + max: 20201096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jp8q6mykp + job_id: j5wel03j5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3043.0 - throughput: 328.62306933946763 + inference_time: 3034.0 + throughput: 329.5978905735003 estimated_peak_memory_range: - min: 36864 - max: 1272496 + min: 155648 + max: 1244008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: j5we83e35 + job_id: j5mn013wp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:52:37Z' + timestamp: '2024-12-11T23:18:36Z' - torchscript_onnx_tflite: - inference_time: 5590.0 - throughput: 178.89087656529517 + inference_time: 5623.0 + throughput: 177.84101013693757 estimated_peak_memory_range: - min: 299008 - max: 21676400 + min: 294912 + max: 24370432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jgkeoqxwg + job_id: jg9lz7yvg job_status: Passed torchscript_onnx_qnn: - inference_time: 5669.0 - throughput: 176.3979537837361 + inference_time: 5472.0 + throughput: 182.7485380116959 estimated_peak_memory_range: - min: 835584 - max: 6810992 + min: 802816 + max: 6786000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jg9lkylwg + job_id: jgn6zd3r5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:52:38Z' + timestamp: '2024-12-11T23:18:37Z' - torchscript_onnx_tflite: - inference_time: 3245.0 - throughput: 308.1664098613251 + inference_time: 3216.0 + throughput: 310.9452736318408 estimated_peak_memory_range: - min: 20480 - max: 22018632 + min: 16384 + max: 53218448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: j5q6zrqnp + job_id: jp14nkwlp job_status: Passed torchscript_onnx_qnn: inference_time: 3034.0 throughput: 329.5978905735003 estimated_peak_memory_range: - min: 53248 - max: 1539344 + min: 827392 + max: 1964528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jp147w48p + job_id: jprvlme9g job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:52:39Z' + timestamp: '2024-12-11T23:18:38Z' - torchscript_onnx_tflite: - inference_time: 5435.0 - throughput: 183.99264029438822 + inference_time: 5439.0 + throughput: 183.85732671446956 estimated_peak_memory_range: min: 16384 - max: 22365296 + max: 23982384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jglvo2mj5 + job_id: jgdxdyqlp job_status: Passed torchscript_onnx_qnn: - inference_time: 5233.0 - throughput: 191.0949742021785 + inference_time: 5283.0 + throughput: 189.28639030853682 estimated_peak_memory_range: min: 802816 - max: 6364384 + max: 6917616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jgdx8qxrp + job_id: jp2krql4p job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:52:40Z' + timestamp: '2024-12-11T23:18:39Z' - torchscript_onnx_tflite: - inference_time: 4787.0 - throughput: 208.89910173386255 + inference_time: 4766.0 + throughput: 209.819555182543 estimated_peak_memory_range: - min: 0 - max: 25063856 + min: 16384 + max: 27520144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: j56yrz46p + job_id: j5wel0q65 job_status: Passed torchscript_onnx_qnn: - inference_time: 4906.0 - throughput: 203.8320423970648 + inference_time: 4909.0 + throughput: 203.70747606437158 estimated_peak_memory_range: min: 802816 - max: 23992496 + max: 27336080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: j57yklyv5 + job_id: jpy1ok67p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:52:41Z' + timestamp: '2024-12-11T23:18:41Z' - torchscript_onnx_qnn: - inference_time: 3230.0 - throughput: 309.59752321981426 + inference_time: 3189.0 + throughput: 313.5779241141424 estimated_peak_memory_range: min: 786432 max: 786432 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jped8wdo5 + job_id: jp4ly6vv5 job_status: Passed torchscript_onnx: - inference_time: 3344.0 - throughput: 299.0430622009569 + inference_time: 3347.0 + throughput: 298.7750224081267 estimated_peak_memory_range: - min: 38699008 - max: 38699008 + min: 39383040 + max: 39383040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 199 - job_id: jgn6om6k5 + job_id: j5q6lx34p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:52:45Z' + timestamp: '2024-12-11T23:18:45Z' diff --git a/qai_hub_models/models/midas/test.py b/qai_hub_models/models/midas/test.py index 11377780..898b2f46 100644 --- a/qai_hub_models/models/midas/test.py +++ b/qai_hub_models/models/midas/test.py @@ -5,7 +5,7 @@ import numpy as np import pytest -from qai_hub_models.models.midas.app import MidasApp +from qai_hub_models.models._shared.depth_estimation.app import DepthEstimationApp from qai_hub_models.models.midas.demo import INPUT_IMAGE_ADDRESS from qai_hub_models.models.midas.demo import main as demo_main from qai_hub_models.models.midas.model import MODEL_ASSET_VERSION, MODEL_ID, Midas @@ -21,7 +21,7 @@ @skip_clone_repo_check def test_task(): (_, _, height, width) = Midas.get_input_spec()["image"][0] - app = MidasApp(Midas.from_pretrained(), height, width) + app = DepthEstimationApp(Midas.from_pretrained(), height, width) original_image = load_image(INPUT_IMAGE_ADDRESS) output_image = app.estimate_depth(original_image) output_image_oracle = load_image(OUTPUT_IMAGE_ADDRESS) @@ -36,7 +36,7 @@ def test_task(): def test_trace(): (_, _, height, width) = Midas.get_input_spec()["image"][0] traced_model = Midas.from_pretrained().convert_to_torchscript(check_trace=False) - app = MidasApp(traced_model, height, width) + app = DepthEstimationApp(traced_model, height, width) original_image = load_image(INPUT_IMAGE_ADDRESS) output_image = app.estimate_depth(original_image) output_image_oracle = load_image(OUTPUT_IMAGE_ADDRESS) diff --git a/qai_hub_models/models/midas_quantized/__init__.py b/qai_hub_models/models/midas_quantized/__init__.py index b9d6a15c..a5ebb881 100644 --- a/qai_hub_models/models/midas_quantized/__init__.py +++ b/qai_hub_models/models/midas_quantized/__init__.py @@ -2,7 +2,9 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- -from qai_hub_models.models.midas.app import MidasApp as App # noqa: F401 +from qai_hub_models.models._shared.depth_estimation.app import ( # noqa: F401 + DepthEstimationApp as App, +) from .model import MODEL_ID # noqa: F401 from .model import MidasQuantizable as Model # noqa: F401 diff --git a/qai_hub_models/models/midas_quantized/demo.py b/qai_hub_models/models/midas_quantized/demo.py index 0c152370..34011603 100644 --- a/qai_hub_models/models/midas_quantized/demo.py +++ b/qai_hub_models/models/midas_quantized/demo.py @@ -2,12 +2,13 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- -from qai_hub_models.models.midas.demo import midas_demo -from qai_hub_models.models.midas_quantized.model import MidasQuantizable +from qai_hub_models.models._shared.depth_estimation.demo import depth_estimation_demo +from qai_hub_models.models.midas.demo import INPUT_IMAGE_ADDRESS +from qai_hub_models.models.midas_quantized.model import MODEL_ID, MidasQuantizable def main(is_test: bool = False): - midas_demo(MidasQuantizable, is_test) + depth_estimation_demo(MidasQuantizable, MODEL_ID, INPUT_IMAGE_ADDRESS, is_test) if __name__ == "__main__": diff --git a/qai_hub_models/models/midas_quantized/export.py b/qai_hub_models/models/midas_quantized/export.py index a17c0064..95d61249 100644 --- a/qai_hub_models/models/midas_quantized/export.py +++ b/qai_hub_models/models/midas_quantized/export.py @@ -207,7 +207,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_onnx=False) + parser = export_parser(model_cls=Model) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/midas_quantized/info.yaml b/qai_hub_models/models/midas_quantized/info.yaml index 1279b050..414cc2b1 100644 --- a/qai_hub_models/models/midas_quantized/info.yaml +++ b/qai_hub_models/models/midas_quantized/info.yaml @@ -8,6 +8,7 @@ use_case: Depth Estimation description: Midas is designed for estimating depth at each point in an image. tags: - quantized +imsdk_supported: true research_paper: https://arxiv.org/abs/1907.01341v3 research_paper_title: 'Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer' diff --git a/qai_hub_models/models/midas_quantized/perf.yaml b/qai_hub_models/models/midas_quantized/perf.yaml index 57be328b..1a0cf45a 100644 --- a/qai_hub_models/models/midas_quantized/perf.yaml +++ b/qai_hub_models/models/midas_quantized/perf.yaml @@ -47,14 +47,15 @@ aggregated: models: - name: Midas-V2-Quantized universal_assets: - torchscript_onnx_tflite: mqex93pvn + torchscript_onnx_tflite: mn0jxedwm + torchscript_onnx: mqv6425lm performance_metrics: - torchscript_onnx_tflite: - inference_time: 1092.0 - throughput: 915.7509157509157 + inference_time: 1101.0 + throughput: 908.2652134423251 estimated_peak_memory_range: min: 20480 - max: 7868608 + max: 60421008 primary_compute_unit: NPU precision: int8 layer_info: @@ -62,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: j5we82n35 + job_id: jp2krq0mp job_status: Passed torchscript_onnx_qnn: - inference_time: 1434.0 - throughput: 697.350069735007 + inference_time: 1436.0 + throughput: 696.3788300835655 estimated_peak_memory_range: - min: 16384 - max: 9934136 + min: 32768 + max: 65743344 primary_compute_unit: NPU precision: int8 layer_info: @@ -77,7 +78,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 203 - job_id: jp8q60vkp + job_id: jgz3lqo65 + job_status: Passed + torchscript_onnx: + inference_time: 8722.0 + throughput: 114.65260261407934 + estimated_peak_memory_range: + min: 1044480 + max: 5320520 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 176 + layers_on_gpu: 0 + layers_on_cpu: 5 + total_layers: 181 + job_id: jpy1okj4p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -86,13 +102,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:51:43Z' + timestamp: '2024-12-11T23:17:55Z' - torchscript_onnx_tflite: - inference_time: 759.0 - throughput: 1317.5230566534915 + inference_time: 766.0 + throughput: 1305.4830287206266 estimated_peak_memory_range: - min: 20480 - max: 27676512 + min: 12288 + max: 28364080 primary_compute_unit: NPU precision: int8 layer_info: @@ -100,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jg9lkjewg + job_id: jp0zm83e5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1008.0 - throughput: 992.063492063492 + inference_time: 1006.0 + throughput: 994.0357852882704 estimated_peak_memory_range: min: 208896 - max: 22858128 + max: 28474880 primary_compute_unit: NPU precision: int8 layer_info: @@ -115,7 +131,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 203 - job_id: jgkeo7mwg + job_id: j5wel02j5 + job_status: Passed + torchscript_onnx: + inference_time: 7116.0 + throughput: 140.5283867341203 + estimated_peak_memory_range: + min: 835584 + max: 293447536 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 176 + layers_on_gpu: 0 + layers_on_cpu: 5 + total_layers: 181 + job_id: jp0zm82e5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -124,13 +155,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:51:44Z' + timestamp: '2024-12-11T23:17:56Z' - torchscript_onnx_tflite: - inference_time: 715.0 - throughput: 1398.6013986013986 + inference_time: 712.0 + throughput: 1404.4943820224719 estimated_peak_memory_range: - min: 8192 - max: 24615488 + min: 12288 + max: 26540320 primary_compute_unit: NPU precision: int8 layer_info: @@ -138,14 +169,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jp147yx8p + job_id: jp8qed08p job_status: Passed torchscript_onnx_qnn: - inference_time: 847.0 - throughput: 1180.637544273908 + inference_time: 1010.0 + throughput: 990.0990099009902 estimated_peak_memory_range: - min: 0 - max: 21634624 + min: 208896 + max: 23375248 primary_compute_unit: NPU precision: int8 layer_info: @@ -153,7 +184,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 203 - job_id: j5q6zeonp + job_id: jg9lz7jvg + job_status: Passed + torchscript_onnx: + inference_time: 7852.0 + throughput: 127.35608762098828 + estimated_peak_memory_range: + min: 897024 + max: 219057568 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 176 + layers_on_gpu: 0 + layers_on_cpu: 5 + total_layers: 181 + job_id: jp8qedm8p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -162,13 +208,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:51:45Z' + timestamp: '2024-12-11T23:17:57Z' - torchscript_onnx_tflite: - inference_time: 3714.0 - throughput: 269.25148088314484 + inference_time: 3770.0 + throughput: 265.2519893899204 estimated_peak_memory_range: min: 12288 - max: 27598560 + max: 29773488 primary_compute_unit: NPU precision: int8 layer_info: @@ -176,14 +222,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jgdx8elrp + job_id: jgke2w7og job_status: Passed torchscript_onnx_qnn: - inference_time: 5880.0 - throughput: 170.06802721088437 + inference_time: 5868.0 + throughput: 170.41581458759373 estimated_peak_memory_range: - min: 237568 - max: 8484304 + min: 212992 + max: 7802848 primary_compute_unit: NPU precision: int8 layer_info: @@ -191,7 +237,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 203 - job_id: jglvo6rj5 + job_id: jp14nkylp job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -200,13 +246,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T01:51:46Z' + timestamp: '2024-12-11T23:17:45Z' - torchscript_onnx_tflite: - inference_time: 15566.0 - throughput: 64.24257998201207 + inference_time: 15397.0 + throughput: 64.94771708774437 estimated_peak_memory_range: min: 94208 - max: 5622400 + max: 2748536 primary_compute_unit: NPU precision: int8 layer_info: @@ -214,7 +260,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: j57yk03v5 + job_id: j5q6lxemp job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -223,13 +269,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T01:51:34Z' + timestamp: '2024-12-11T23:17:34Z' - torchscript_onnx_tflite: - inference_time: 1082.0 - throughput: 924.2144177449168 + inference_time: 1083.0 + throughput: 923.3610341643582 estimated_peak_memory_range: - min: 12288 - max: 7927304 + min: 20480 + max: 61662064 primary_compute_unit: NPU precision: int8 layer_info: @@ -237,14 +283,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jp4lmk085 + job_id: jglvy96l5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1328.0 - throughput: 753.0120481927711 + inference_time: 1308.0 + throughput: 764.525993883792 estimated_peak_memory_range: - min: 249856 - max: 1590528 + min: 221184 + max: 1388768 primary_compute_unit: NPU precision: int8 layer_info: @@ -252,7 +298,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 203 - job_id: j56yrel6p + job_id: jgdxdyelp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -261,13 +307,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:51:47Z' + timestamp: '2024-12-11T23:17:46Z' - torchscript_onnx_tflite: - inference_time: 11115.0 - throughput: 89.9685110211426 + inference_time: 11089.0 + throughput: 90.17945711966814 estimated_peak_memory_range: - min: 16384 - max: 22737872 + min: 45056 + max: 24637760 primary_compute_unit: NPU precision: int8 layer_info: @@ -275,14 +321,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jpxk3n235 + job_id: j56y89e7p job_status: Passed torchscript_onnx_qnn: - inference_time: 12256.0 - throughput: 81.59268929503916 + inference_time: 12231.0 + throughput: 81.7594636579184 estimated_peak_memory_range: - min: 188416 - max: 6133728 + min: 167936 + max: 10486592 primary_compute_unit: NPU precision: int8 layer_info: @@ -290,7 +336,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 203 - job_id: jgo2okqqp + job_id: jp4ly6dl5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -299,13 +345,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:51:49Z' + timestamp: '2024-12-11T23:17:48Z' - torchscript_onnx_tflite: - inference_time: 1096.0 - throughput: 912.4087591240876 + inference_time: 1102.0 + throughput: 907.4410163339383 estimated_peak_memory_range: min: 16384 - max: 12121632 + max: 61726600 primary_compute_unit: NPU precision: int8 layer_info: @@ -313,14 +359,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: j5mnoqydp + job_id: jp3jzlvzg job_status: Passed torchscript_onnx_qnn: - inference_time: 1327.0 - throughput: 753.5795026375282 + inference_time: 1326.0 + throughput: 754.1478129713424 estimated_peak_memory_range: min: 229376 - max: 1421856 + max: 1461096 primary_compute_unit: NPU precision: int8 layer_info: @@ -328,7 +374,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 203 - job_id: jpv6erkk5 + job_id: jpxkl8695 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -337,13 +383,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:51:51Z' + timestamp: '2024-12-11T23:17:49Z' - torchscript_onnx_tflite: - inference_time: 1928.0 - throughput: 518.6721991701245 + inference_time: 1943.0 + throughput: 514.668039114771 estimated_peak_memory_range: - min: 16384 - max: 21630480 + min: 0 + max: 23737360 primary_compute_unit: NPU precision: int8 layer_info: @@ -351,14 +397,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jprvo8j0g + job_id: jgo2l7kdp job_status: Passed torchscript_onnx_qnn: - inference_time: 2931.0 - throughput: 341.180484476288 + inference_time: 2430.0 + throughput: 411.52263374485597 estimated_peak_memory_range: min: 0 - max: 5719664 + max: 5928800 primary_compute_unit: NPU precision: int8 layer_info: @@ -366,7 +412,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 203 - job_id: jgjvo2nvg + job_id: j5mn016qp job_status: Passed reference_device_info: name: SA8295P ADP @@ -375,13 +421,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:51:52Z' + timestamp: '2024-12-11T23:17:51Z' - torchscript_onnx_tflite: - inference_time: 1101.0 - throughput: 908.2652134423251 + inference_time: 1108.0 + throughput: 902.5270758122743 estimated_peak_memory_range: - min: 16384 - max: 51946048 + min: 20480 + max: 62801872 primary_compute_unit: NPU precision: int8 layer_info: @@ -389,14 +435,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jp2k40nrp + job_id: jpv6ly0m5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1323.0 - throughput: 755.8578987150415 + inference_time: 1327.0 + throughput: 753.5795026375282 estimated_peak_memory_range: min: 229376 - max: 1882136 + max: 1577104 primary_compute_unit: NPU precision: int8 layer_info: @@ -404,7 +450,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 203 - job_id: jped8wmo5 + job_id: jgn6zdmm5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -413,13 +459,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:51:53Z' + timestamp: '2024-12-11T23:17:52Z' - torchscript_onnx_tflite: - inference_time: 1596.0 - throughput: 626.5664160401003 + inference_time: 1605.0 + throughput: 623.0529595015577 estimated_peak_memory_range: - min: 12288 - max: 23631312 + min: 16384 + max: 26826848 primary_compute_unit: NPU precision: int8 layer_info: @@ -427,14 +473,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jpy1qr08p + job_id: jgjvr6z8g job_status: Passed torchscript_onnx_qnn: - inference_time: 2108.0 - throughput: 474.3833017077799 + inference_time: 2096.0 + throughput: 477.09923664122135 estimated_peak_memory_range: - min: 0 - max: 5681792 + min: 212992 + max: 6279280 primary_compute_unit: NPU precision: int8 layer_info: @@ -442,7 +488,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 203 - job_id: jgz38jdo5 + job_id: jprvlm2eg job_status: Passed reference_device_info: name: SA8775P ADP @@ -451,13 +497,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:51:54Z' + timestamp: '2024-12-11T23:17:53Z' - torchscript_onnx_tflite: - inference_time: 1443.0 - throughput: 693.000693000693 + inference_time: 1445.0 + throughput: 692.0415224913495 estimated_peak_memory_range: min: 81920 - max: 31067168 + max: 32346800 primary_compute_unit: NPU precision: int8 layer_info: @@ -465,14 +511,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jp0zd3795 + job_id: jped70e05 job_status: Passed torchscript_onnx_qnn: - inference_time: 1770.0 - throughput: 564.9717514124294 + inference_time: 1816.0 + throughput: 550.6607929515418 estimated_peak_memory_range: min: 212992 - max: 29243440 + max: 29887104 primary_compute_unit: NPU precision: int8 layer_info: @@ -480,7 +526,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 203 - job_id: j5we83635 + job_id: jp2krq9mp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -489,13 +535,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:51:55Z' + timestamp: '2024-12-11T23:17:54Z' - torchscript_onnx_qnn: - inference_time: 1472.0 - throughput: 679.3478260869565 + inference_time: 1471.0 + throughput: 679.8096532970768 estimated_peak_memory_range: - min: 348160 - max: 348160 + min: 339968 + max: 339968 primary_compute_unit: NPU precision: int8 layer_info: @@ -503,7 +549,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 203 - job_id: jp3jxv23g + job_id: j57ye1lr5 + job_status: Passed + torchscript_onnx: + inference_time: 8848.0 + throughput: 113.01989150090417 + estimated_peak_memory_range: + min: 25239552 + max: 25239552 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 176 + layers_on_gpu: 0 + layers_on_cpu: 5 + total_layers: 181 + job_id: jgke2wqog job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -512,4 +573,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:51:48Z' + timestamp: '2024-12-11T23:17:58Z' diff --git a/qai_hub_models/models/midas_quantized/test.py b/qai_hub_models/models/midas_quantized/test.py index 032a129e..a8a2aea0 100644 --- a/qai_hub_models/models/midas_quantized/test.py +++ b/qai_hub_models/models/midas_quantized/test.py @@ -4,7 +4,7 @@ # --------------------------------------------------------------------- import numpy as np -from qai_hub_models.models.midas.app import MidasApp +from qai_hub_models.models._shared.depth_estimation.app import DepthEstimationApp from qai_hub_models.models.midas.demo import INPUT_IMAGE_ADDRESS from qai_hub_models.models.midas_quantized.demo import main as demo_main from qai_hub_models.models.midas_quantized.model import ( @@ -24,7 +24,7 @@ @skip_clone_repo_check def test_task(): (_, _, height, width) = MidasQuantizable.get_input_spec()["image"][0] - app = MidasApp(MidasQuantizable.from_pretrained(), height, width) + app = DepthEstimationApp(MidasQuantizable.from_pretrained(), height, width) original_image = load_image(INPUT_IMAGE_ADDRESS) output_image = app.estimate_depth(original_image) output_image_oracle = load_image(OUTPUT_IMAGE_ADDRESS) diff --git a/qai_hub_models/models/mnasnet05/perf.yaml b/qai_hub_models/models/mnasnet05/perf.yaml index baa2cf02..9128b5d0 100644 --- a/qai_hub_models/models/mnasnet05/perf.yaml +++ b/qai_hub_models/models/mnasnet05/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: MNASNet05 universal_assets: - torchscript_onnx_tflite: mq8k1vjgq - torchscript_onnx: mqy3d4k7m + torchscript_onnx_tflite: mm5edg49m + torchscript_onnx: mq2143xdm performance_metrics: - torchscript_onnx_tflite: - inference_time: 759.0 - throughput: 1317.5230566534915 + inference_time: 760.0 + throughput: 1315.7894736842106 estimated_peak_memory_range: - min: 28672 - max: 43542888 + min: 126976 + max: 183970752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jped8e915 + job_id: jgdxdyxlp job_status: Passed torchscript_onnx_qnn: - inference_time: 824.0 - throughput: 1213.5922330097087 + inference_time: 818.0 + throughput: 1222.4938875305625 estimated_peak_memory_range: - min: 12288 - max: 172224888 + min: 1359872 + max: 33953168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jgn6olyr5 + job_id: jp8qed28p job_status: Passed torchscript_onnx: - inference_time: 771.0 - throughput: 1297.0168612191958 + inference_time: 743.0 + throughput: 1345.8950201884254 estimated_peak_memory_range: - min: 360448 - max: 2092240 + min: 20480 + max: 158303112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: jgo2ok0xp + job_id: j5wel0wj5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:50:39Z' + timestamp: '2024-12-11T23:16:39Z' - torchscript_onnx_tflite: - inference_time: 510.0 - throughput: 1960.7843137254902 + inference_time: 511.0 + throughput: 1956.9471624266146 estimated_peak_memory_range: min: 16384 - max: 10767664 + max: 21436368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jgz38oek5 + job_id: j57ye12r5 job_status: Passed torchscript_onnx_qnn: - inference_time: 563.0 - throughput: 1776.1989342806394 + inference_time: 561.0 + throughput: 1782.5311942959001 estimated_peak_memory_range: - min: 0 - max: 13921024 + min: 618496 + max: 18483632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jprvo8q9g + job_id: jgke2wvog job_status: Passed torchscript_onnx: - inference_time: 555.0 - throughput: 1801.8018018018017 + inference_time: 541.0 + throughput: 1848.4288354898335 estimated_peak_memory_range: min: 0 - max: 55636320 + max: 56079936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: jpv6e0oj5 + job_id: jg9lz70vg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:50:40Z' + timestamp: '2024-12-11T23:16:40Z' - torchscript_onnx_tflite: - inference_time: 512.0 - throughput: 1953.125 + inference_time: 417.0 + throughput: 2398.0815347721823 estimated_peak_memory_range: - min: 12288 - max: 11052400 + min: 8192 + max: 15135056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: j5we82v65 + job_id: jp4ly6nl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 555.0 - throughput: 1801.8018018018017 + inference_time: 582.0 + throughput: 1718.213058419244 estimated_peak_memory_range: min: 0 - max: 12129008 + max: 15612832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jp2k4064p + job_id: j5q6lx0mp job_status: Passed torchscript_onnx: - inference_time: 564.0 - throughput: 1773.049645390071 + inference_time: 566.0 + throughput: 1766.7844522968198 estimated_peak_memory_range: min: 0 - max: 24232448 + max: 23650976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: jgjvozmxg + job_id: jp14nk2lp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:50:41Z' + timestamp: '2024-12-11T23:16:41Z' - torchscript_onnx_tflite: - inference_time: 757.0 - throughput: 1321.003963011889 + inference_time: 753.0 + throughput: 1328.0212483399735 estimated_peak_memory_range: - min: 20480 - max: 58188776 + min: 16384 + max: 32988912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jg9lkj1lg + job_id: jpxkl8995 job_status: Passed torchscript_onnx_qnn: - inference_time: 760.0 - throughput: 1315.7894736842106 + inference_time: 767.0 + throughput: 1303.7809647979138 estimated_peak_memory_range: - min: 626688 - max: 1949104 + min: 638976 + max: 2220792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jpy1qrw7p + job_id: jglvy94l5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:50:29Z' + timestamp: '2024-12-11T23:16:30Z' - torchscript_onnx_tflite: - inference_time: 10829.0 - throughput: 92.3446301597562 + inference_time: 10855.0 + throughput: 92.12344541685859 estimated_peak_memory_range: min: 20480 - max: 12789840 + max: 14857904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jp147yl2p + job_id: j5mn01eqp job_status: Passed torchscript_onnx_qnn: - inference_time: 11045.0 - throughput: 90.53870529651427 + inference_time: 11087.0 + throughput: 90.19572472264815 estimated_peak_memory_range: - min: 569344 - max: 6296000 + min: 565248 + max: 11041648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jp8q609xp + job_id: jp3jzlnzg job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:50:31Z' + timestamp: '2024-12-11T23:16:33Z' - torchscript_onnx_tflite: inference_time: 759.0 throughput: 1317.5230566534915 estimated_peak_memory_range: - min: 16384 - max: 33212960 + min: 24576 + max: 4663232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jgdx8e9ep + job_id: jgn6zd0m5 job_status: Passed torchscript_onnx_qnn: - inference_time: 767.0 - throughput: 1303.7809647979138 + inference_time: 765.0 + throughput: 1307.18954248366 estimated_peak_memory_range: - min: 626688 - max: 1852560 + min: 655360 + max: 2018768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jgkeo7n2g + job_id: jgo2l7zdp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:50:32Z' + timestamp: '2024-12-11T23:16:34Z' - torchscript_onnx_tflite: - inference_time: 1468.0 - throughput: 681.1989100817439 + inference_time: 1434.0 + throughput: 697.350069735007 estimated_peak_memory_range: - min: 40960 - max: 10471184 + min: 16384 + max: 10740112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: j57yk0wl5 + job_id: jprvlm6eg job_status: Passed torchscript_onnx_qnn: - inference_time: 1640.0 - throughput: 609.7560975609756 + inference_time: 1541.0 + throughput: 648.9292667099286 estimated_peak_memory_range: min: 0 - max: 5832816 + max: 6021904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: j5q6zek4p + job_id: jpv6lyqm5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:50:34Z' + timestamp: '2024-12-11T23:16:35Z' - torchscript_onnx_tflite: - inference_time: 760.0 - throughput: 1315.7894736842106 + inference_time: 754.0 + throughput: 1326.2599469496022 estimated_peak_memory_range: - min: 16384 - max: 114798056 + min: 24576 + max: 33026720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jp4lmkov5 + job_id: jp2krqxmp job_status: Passed torchscript_onnx_qnn: - inference_time: 764.0 - throughput: 1308.9005235602094 + inference_time: 762.0 + throughput: 1312.3359580052493 estimated_peak_memory_range: min: 634880 - max: 1880520 + max: 1913352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jglvo6z85 + job_id: jgjvr6d8g job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:50:35Z' + timestamp: '2024-12-11T23:16:36Z' - torchscript_onnx_tflite: - inference_time: 1351.0 - throughput: 740.1924500370096 + inference_time: 1333.0 + throughput: 750.1875468867216 estimated_peak_memory_range: - min: 16384 - max: 10627408 + min: 0 + max: 15793072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jpxk3nj15 + job_id: jpy1okz4p job_status: Passed torchscript_onnx_qnn: - inference_time: 1504.0 - throughput: 664.8936170212766 + inference_time: 1509.0 + throughput: 662.6905235255136 estimated_peak_memory_range: - min: 630784 - max: 6479536 + min: 618496 + max: 6772256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: j56yrej0p + job_id: jped70o05 job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:50:36Z' + timestamp: '2024-12-11T23:16:37Z' - torchscript_onnx_tflite: - inference_time: 1039.0 - throughput: 962.4639076034649 + inference_time: 1043.0 + throughput: 958.7727708533077 estimated_peak_memory_range: - min: 20480 - max: 16562544 + min: 16384 + max: 22676912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: j5mnoq2wp + job_id: jp0zm84e5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1107.0 - throughput: 903.342366757001 + inference_time: 1104.0 + throughput: 905.7971014492754 estimated_peak_memory_range: - min: 618496 - max: 16131536 + min: 622592 + max: 17929488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jp3jxv3lg + job_id: jgz3lq265 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:50:37Z' + timestamp: '2024-12-11T23:16:38Z' - torchscript_onnx_qnn: - inference_time: 902.0 - throughput: 1108.6474501108648 + inference_time: 904.0 + throughput: 1106.1946902654868 estimated_peak_memory_range: min: 602112 max: 602112 @@ -485,7 +485,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jp0zd3q65 + job_id: j56y8927p + job_status: Passed + torchscript_onnx: + inference_time: 823.0 + throughput: 1215.0668286755772 + estimated_peak_memory_range: + min: 7225344 + max: 7225344 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 104 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 104 + job_id: jgdxdynlp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -494,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:50:42Z' + timestamp: '2024-12-11T23:16:42Z' diff --git a/qai_hub_models/models/mobile_vit/perf.yaml b/qai_hub_models/models/mobile_vit/perf.yaml index a28e74bb..b93001d4 100644 --- a/qai_hub_models/models/mobile_vit/perf.yaml +++ b/qai_hub_models/models/mobile_vit/perf.yaml @@ -1,7 +1,512 @@ aggregated: - supported_devices: [] - supported_chipsets: [] + supported_devices: + - Snapdragon 8 Elite QRD + - Samsung Galaxy S24 + - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ + - Samsung Galaxy S23 + - Samsung Galaxy S23 Ultra + - Samsung Galaxy S23+ + - Samsung Galaxy S22 5G + - Samsung Galaxy S22 Ultra 5G + - Samsung Galaxy S22+ 5G + - Samsung Galaxy Tab S8 + - Xiaomi 12 + - Xiaomi 12 Pro + - Samsung Galaxy S21 + - Samsung Galaxy S21 Ultra + - Samsung Galaxy S21+ + - Snapdragon X Elite CRD + - Snapdragon X Plus 8-Core CRD + - SA8775P ADP + - QCS8450 (Proxy) + - XR2 Gen 2 (Proxy) + - QCS8550 (Proxy) + - SA7255P ADP + - SA8255 (Proxy) + - SA8295P ADP + - SA8650 (Proxy) + supported_chipsets: + - Snapdragon® 8 Elite + - Snapdragon® 8 Gen 3 + - Snapdragon® 8 Gen 2 + - Snapdragon® 8 Gen 1 + - Snapdragon® 888 + - Snapdragon® X Elite + - Snapdragon® X Plus 8-Core + - SA8775P + - QCS8450 Proxy + - QCS8550 Proxy + - SA7255P + - SA8255P Proxy + - SA8295P + - SA8650P Proxy models: - name: Mobile_Vit - universal_assets: {} - performance_metrics: [] + universal_assets: + torchscript_onnx_tflite: mqv642vlm + torchscript_onnx: mmr365exm + performance_metrics: + - torchscript_onnx_tflite: + inference_time: 5200.0 + throughput: 192.30769230769232 + estimated_peak_memory_range: + min: 16384 + max: 18460424 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 577 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 577 + job_id: j5wel06z5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 5061.0 + throughput: 197.58940920766648 + estimated_peak_memory_range: + min: 40960 + max: 12997800 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 472 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 472 + job_id: jpxkl8k95 + job_status: Passed + torchscript_onnx: + inference_time: 6022.0 + throughput: 166.05778811026238 + estimated_peak_memory_range: + min: 57344 + max: 16725880 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 437 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 437 + job_id: j56y89y7p + job_status: Passed + reference_device_info: + name: Samsung Galaxy S23 + os: '13' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 2 + timestamp: '2024-12-11T23:15:54Z' + - torchscript_onnx_tflite: + inference_time: 3588.0 + throughput: 278.70680044593087 + estimated_peak_memory_range: + min: 12288 + max: 37503856 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 577 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 577 + job_id: jg9lz7nqg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 3518.0 + throughput: 284.2524161455372 + estimated_peak_memory_range: + min: 802816 + max: 40781808 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 472 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 472 + job_id: j5mn01nqp + job_status: Passed + torchscript_onnx: + inference_time: 4232.0 + throughput: 236.29489603024575 + estimated_peak_memory_range: + min: 815104 + max: 120619872 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 437 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 437 + job_id: jp3jzljzg + job_status: Passed + reference_device_info: + name: Samsung Galaxy S24 + os: '14' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-12-11T23:15:56Z' + - torchscript_onnx_tflite: + inference_time: 2892.0 + throughput: 345.78146611341634 + estimated_peak_memory_range: + min: 12288 + max: 34074240 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 577 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 577 + job_id: jp14nkzkp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 3514.0 + throughput: 284.57598178713715 + estimated_peak_memory_range: + min: 0 + max: 38818320 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 472 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 472 + job_id: jgn6zd6m5 + job_status: Passed + torchscript_onnx: + inference_time: 3288.0 + throughput: 304.1362530413625 + estimated_peak_memory_range: + min: 0 + max: 65853696 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 437 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 437 + job_id: jgo2l72dp + job_status: Passed + reference_device_info: + name: Snapdragon 8 Elite QRD + os: '15' + form_factor: Phone + os_name: Android + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-12-11T23:15:57Z' + - torchscript_onnx_tflite: + inference_time: 5194.0 + throughput: 192.52984212552946 + estimated_peak_memory_range: + min: 24576 + max: 17995040 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 577 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 577 + job_id: jgdxdy1kp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 4613.0 + throughput: 216.77866897897246 + estimated_peak_memory_range: + min: 835584 + max: 2074304 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 472 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 472 + job_id: jprvlmveg + job_status: Passed + reference_device_info: + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: QCS8550 Proxy + timestamp: '2024-12-11T23:15:46Z' + - torchscript_onnx_tflite: + inference_time: 43281.0 + throughput: 23.10482659827638 + estimated_peak_memory_range: + min: 57344 + max: 31292336 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 577 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 577 + job_id: j5wel06j5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 43413.0 + throughput: 23.034574896920276 + estimated_peak_memory_range: + min: 819200 + max: 11275536 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 472 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 472 + job_id: jpy1ok14p + job_status: Passed + reference_device_info: + name: SA7255P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA7255P + timestamp: '2024-12-11T23:15:48Z' + - torchscript_onnx_tflite: + inference_time: 5213.0 + throughput: 191.8281220026856 + estimated_peak_memory_range: + min: 16384 + max: 19224048 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 577 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 577 + job_id: jg9lz7nvg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 4649.0 + throughput: 215.10002151000216 + estimated_peak_memory_range: + min: 823296 + max: 1921032 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 472 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 472 + job_id: jp0zm8ze5 + job_status: Passed + reference_device_info: + name: SA8255 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8255P Proxy + timestamp: '2024-12-11T23:15:49Z' + - torchscript_onnx_tflite: + inference_time: 8342.0 + throughput: 119.87532965715656 + estimated_peak_memory_range: + min: 36864 + max: 31623216 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 577 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 577 + job_id: jp14nkzlp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 8407.0 + throughput: 118.94849530153444 + estimated_peak_memory_range: + min: 802816 + max: 7010320 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 472 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 472 + job_id: jp8qedq8p + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-12-11T23:15:50Z' + - torchscript_onnx_tflite: + inference_time: 5203.0 + throughput: 192.19680953296177 + estimated_peak_memory_range: + min: 16384 + max: 18281568 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 577 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 577 + job_id: jgdxdy1lp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 4636.0 + throughput: 215.70319240724763 + estimated_peak_memory_range: + min: 860160 + max: 2103632 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 472 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 472 + job_id: jgke2weog + job_status: Passed + reference_device_info: + name: SA8650 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8650P Proxy + timestamp: '2024-12-11T23:15:51Z' + - torchscript_onnx_tflite: + inference_time: 7393.0 + throughput: 135.26308670363858 + estimated_peak_memory_range: + min: 20480 + max: 30544128 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 577 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 577 + job_id: j57ye1yr5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 7716.0 + throughput: 129.60082944530845 + estimated_peak_memory_range: + min: 802816 + max: 6623056 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 472 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 472 + job_id: j5q6lx6mp + job_status: Passed + reference_device_info: + name: SA8775P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8775P + timestamp: '2024-12-11T23:15:52Z' + - torchscript_onnx_tflite: + inference_time: 7175.0 + throughput: 139.37282229965157 + estimated_peak_memory_range: + min: 24576 + max: 39411296 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 577 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 577 + job_id: jp4ly6ll5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 7098.0 + throughput: 140.88475626937165 + estimated_peak_memory_range: + min: 827392 + max: 42068432 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 472 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 472 + job_id: jglvy9vl5 + job_status: Passed + reference_device_info: + name: QCS8450 (Proxy) + os: '13' + form_factor: Xr + os_name: Android + manufacturer: Qualcomm + chipset: QCS8450 Proxy + timestamp: '2024-12-11T23:15:53Z' + - torchscript_onnx_qnn: + inference_time: 5043.0 + throughput: 198.29466587348801 + estimated_peak_memory_range: + min: 786432 + max: 786432 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 472 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 472 + job_id: jp2krqkmp + job_status: Passed + torchscript_onnx: + inference_time: 6442.0 + throughput: 155.2312946289972 + estimated_peak_memory_range: + min: 15552512 + max: 15552512 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 437 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 437 + job_id: jpv6ly6m5 + job_status: Passed + reference_device_info: + name: Snapdragon X Elite CRD + os: '11' + form_factor: Compute + os_name: Windows + manufacturer: Qualcomm + chipset: Snapdragon® X Elite + timestamp: '2024-12-11T23:15:58Z' diff --git a/qai_hub_models/models/mobilenet_v2/perf.yaml b/qai_hub_models/models/mobilenet_v2/perf.yaml index d48e4f97..e4c80992 100644 --- a/qai_hub_models/models/mobilenet_v2/perf.yaml +++ b/qai_hub_models/models/mobilenet_v2/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: MobileNet-v2 universal_assets: - torchscript_onnx_tflite: mq9pde40n - torchscript_onnx: mnlvk1ejm + torchscript_onnx_tflite: mqkvk2vkm + torchscript_onnx: mnw8erepn performance_metrics: - torchscript_onnx_tflite: - inference_time: 880.0 - throughput: 1136.3636363636363 + inference_time: 876.0 + throughput: 1141.552511415525 estimated_peak_memory_range: - min: 53248 - max: 29959120 + min: 20480 + max: 60073424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jgkeo7j2g + job_id: jp3jzr2xg job_status: Passed torchscript_onnx_qnn: - inference_time: 1089.0 - throughput: 918.2736455463728 + inference_time: 1087.0 + throughput: 919.9632014719411 estimated_peak_memory_range: - min: 12288 - max: 208254136 + min: 618496 + max: 52925840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: j5we82o65 + job_id: j57ye83q5 job_status: Passed torchscript_onnx: - inference_time: 905.0 - throughput: 1104.9723756906078 + inference_time: 900.0 + throughput: 1111.111111111111 estimated_peak_memory_range: - min: 393216 - max: 1864104 + min: 12288 + max: 9580632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: jp0zd3y65 + job_id: j5q6lxq7p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:49:53Z' + timestamp: '2024-12-11T23:15:07Z' - torchscript_onnx_tflite: - inference_time: 590.0 - throughput: 1694.915254237288 + inference_time: 585.0 + throughput: 1709.4017094017095 estimated_peak_memory_range: - min: 12288 - max: 18572720 + min: 16384 + max: 14461600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: j5q6zej4p + job_id: jgo2l9q4p job_status: Passed torchscript_onnx_qnn: inference_time: 725.0 throughput: 1379.3103448275863 estimated_peak_memory_range: min: 0 - max: 17018576 + max: 20888736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: jg9lkjvlg + job_id: jp4ly20q5 job_status: Passed torchscript_onnx: - inference_time: 629.0 - throughput: 1589.825119236884 + inference_time: 647.0 + throughput: 1545.595054095827 estimated_peak_memory_range: - min: 405504 - max: 68013648 + min: 0 + max: 67830880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: jp8q60oxp + job_id: jglvy9me5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:49:54Z' + timestamp: '2024-12-11T23:15:08Z' - torchscript_onnx_tflite: inference_time: 590.0 throughput: 1694.915254237288 estimated_peak_memory_range: - min: 0 - max: 12988464 + min: 12288 + max: 16131344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jglvo6j85 + job_id: jpv6lnx75 job_status: Passed torchscript_onnx_qnn: - inference_time: 732.0 - throughput: 1366.120218579235 + inference_time: 759.0 + throughput: 1317.5230566534915 estimated_peak_memory_range: min: 0 - max: 14505104 + max: 13972368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: jp147y02p + job_id: jpxklz2j5 job_status: Passed torchscript_onnx: - inference_time: 658.0 - throughput: 1519.756838905775 + inference_time: 543.0 + throughput: 1841.6206261510129 estimated_peak_memory_range: min: 0 - max: 25260416 + max: 26273728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: jgkeo7z2g + job_id: j56y894vp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:49:55Z' + timestamp: '2024-12-11T23:15:09Z' - torchscript_onnx_tflite: - inference_time: 872.0 - throughput: 1146.788990825688 + inference_time: 883.0 + throughput: 1132.5028312570782 estimated_peak_memory_range: - min: 28672 - max: 159991200 + min: 20480 + max: 41752920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: j56yrek0p + job_id: jgjvr847g job_status: Passed torchscript_onnx_qnn: - inference_time: 1035.0 - throughput: 966.1835748792271 + inference_time: 1032.0 + throughput: 968.9922480620155 estimated_peak_memory_range: - min: 630784 - max: 1798392 + min: 638976 + max: 1824496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: jgdx8ewep + job_id: j5mn0lyyp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:49:44Z' + timestamp: '2024-12-11T23:14:57Z' - torchscript_onnx_tflite: - inference_time: 12915.0 - throughput: 77.42934572202864 + inference_time: 12926.0 + throughput: 77.36345350456445 estimated_peak_memory_range: - min: 0 - max: 13182960 + min: 49152 + max: 16058272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jp3jxvylg + job_id: jped7n375 job_status: Passed torchscript_onnx_qnn: - inference_time: 13295.0 - throughput: 75.21624670928921 + inference_time: 13322.0 + throughput: 75.06380423359856 estimated_peak_memory_range: - min: 614400 - max: 6498144 + min: 548864 + max: 10868480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: jpxk3nv15 + job_id: jprvlm3vg job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:49:46Z' + timestamp: '2024-12-11T23:15:00Z' - torchscript_onnx_tflite: - inference_time: 883.0 - throughput: 1132.5028312570782 + inference_time: 882.0 + throughput: 1133.7868480725624 estimated_peak_memory_range: - min: 16384 - max: 74603792 + min: 20480 + max: 51000744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jgo2okjxp + job_id: jgz3l0kz5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1038.0 - throughput: 963.3911368015414 + inference_time: 1035.0 + throughput: 966.1835748792271 estimated_peak_memory_range: min: 626688 - max: 2031000 + max: 1915440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: j5mnoqrwp + job_id: jp2krqyxp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:49:47Z' + timestamp: '2024-12-11T23:15:01Z' - torchscript_onnx_tflite: - inference_time: 1455.0 - throughput: 687.2852233676975 + inference_time: 1490.0 + throughput: 671.1409395973154 estimated_peak_memory_range: min: 16384 - max: 10834272 + max: 11635840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jpv6e0jj5 + job_id: j5welrnz5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1730.0 - throughput: 578.0346820809249 + inference_time: 1755.0 + throughput: 569.8005698005697 estimated_peak_memory_range: - min: 659456 - max: 6665632 + min: 0 + max: 6023248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: jgn6ol2r5 + job_id: jpy1ok3rp job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:49:48Z' + timestamp: '2024-12-11T23:15:02Z' - torchscript_onnx_tflite: - inference_time: 879.0 - throughput: 1137.6564277588168 + inference_time: 881.0 + throughput: 1135.0737797956867 estimated_peak_memory_range: - min: 16384 - max: 213187464 + min: 20480 + max: 52485096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jgjvozjxg + job_id: jg9lzqeqg job_status: Passed torchscript_onnx_qnn: - inference_time: 1036.0 - throughput: 965.2509652509652 + inference_time: 1042.0 + throughput: 959.6928982725528 estimated_peak_memory_range: - min: 634880 - max: 1948928 + min: 638976 + max: 1794408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: jprvo8k9g + job_id: jp0zm8025 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:49:50Z' + timestamp: '2024-12-11T23:15:04Z' - torchscript_onnx_tflite: - inference_time: 1470.0 - throughput: 680.2721088435375 + inference_time: 1484.0 + throughput: 673.8544474393531 estimated_peak_memory_range: - min: 20480 - max: 13221136 + min: 16384 + max: 16571248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jped8ej15 + job_id: jp14nmxkp job_status: Passed torchscript_onnx_qnn: - inference_time: 1872.0 - throughput: 534.1880341880342 + inference_time: 1898.0 + throughput: 526.8703898840885 estimated_peak_memory_range: - min: 626688 - max: 6187392 + min: 618496 + max: 6737952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: jp2k4084p + job_id: jp8qedyzp job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:49:51Z' + timestamp: '2024-12-11T23:15:05Z' - torchscript_onnx_tflite: - inference_time: 1053.0 - throughput: 949.667616334283 + inference_time: 1058.0 + throughput: 945.179584120983 estimated_peak_memory_range: min: 16384 - max: 16787232 + max: 24455360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jgz38o1k5 + job_id: jgdxdmlkp job_status: Passed torchscript_onnx_qnn: - inference_time: 1276.0 - throughput: 783.6990595611285 + inference_time: 1277.0 + throughput: 783.0853563038371 estimated_peak_memory_range: min: 618496 - max: 19128800 + max: 24356704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: jpy1qre7p + job_id: jgke2wxyg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:49:52Z' + timestamp: '2024-12-11T23:15:06Z' - torchscript_onnx_qnn: - inference_time: 1210.0 - throughput: 826.4462809917355 + inference_time: 1192.0 + throughput: 838.9261744966443 estimated_peak_memory_range: min: 602112 max: 602112 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: jp4lmkqv5 + job_id: jgn6zdvv5 job_status: Passed torchscript_onnx: - inference_time: 939.0 - throughput: 1064.9627263045793 + inference_time: 967.0 + throughput: 1034.126163391934 estimated_peak_memory_range: - min: 7999488 - max: 7999488 + min: 9715712 + max: 9715712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: j5q6ze84p + job_id: jp3jzl0xg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:49:56Z' + timestamp: '2024-12-11T23:15:10Z' diff --git a/qai_hub_models/models/mobilenet_v2_quantized/info.yaml b/qai_hub_models/models/mobilenet_v2_quantized/info.yaml index 07012735..21632e0d 100644 --- a/qai_hub_models/models/mobilenet_v2_quantized/info.yaml +++ b/qai_hub_models/models/mobilenet_v2_quantized/info.yaml @@ -11,6 +11,7 @@ tags: - backbone - real-time - quantized +imsdk_supported: true research_paper: https://arxiv.org/abs/1801.04381 research_paper_title: 'MobileNetV2: Inverted Residuals and Linear Bottlenecks' license: https://github.com/tonylins/pytorch-mobilenet-v2/blob/master/LICENSE diff --git a/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml b/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml index 4ad7e39d..1792c909 100644 --- a/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml +++ b/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml @@ -50,15 +50,15 @@ aggregated: models: - name: MobileNet-v2-Quantized universal_assets: - torchscript_onnx_tflite: mqexpdw5n - torchscript_onnx: mn0x9l49n + torchscript_onnx_tflite: mmd3y60kn + torchscript_onnx: mq9lpoe0q performance_metrics: - torchscript_onnx_tflite: - inference_time: 432.0 - throughput: 2314.814814814815 + inference_time: 420.0 + throughput: 2380.9523809523807 estimated_peak_memory_range: - min: 16384 - max: 121654280 + min: 12288 + max: 7332392 primary_compute_unit: NPU precision: int8 layer_info: @@ -66,14 +66,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 108 - job_id: jpv6ezzr5 + job_id: jgz3l90o5 job_status: Passed torchscript_onnx_qnn: - inference_time: 641.0 - throughput: 1560.0624024960998 + inference_time: 634.0 + throughput: 1577.2870662460568 estimated_peak_memory_range: min: 16384 - max: 9900792 + max: 107157200 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,14 +81,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jgn6orrq5 + job_id: j5mn02l9p job_status: Passed torchscript_onnx: - inference_time: 117344.0 - throughput: 8.521952549768203 + inference_time: 116498.0 + throughput: 8.583838349156208 estimated_peak_memory_range: - min: 17805312 - max: 77840648 + min: 17821696 + max: 78238816 primary_compute_unit: NPU precision: int8 layer_info: @@ -96,7 +96,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 87 total_layers: 456 - job_id: jpv6ez4r5 + job_id: jgjvrm6eg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -105,13 +105,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T04:00:24Z' + timestamp: '2024-12-12T01:29:21Z' - torchscript_onnx_tflite: - inference_time: 293.0 - throughput: 3412.9692832764504 + inference_time: 295.0 + throughput: 3389.830508474576 estimated_peak_memory_range: min: 12288 - max: 17666304 + max: 15029088 primary_compute_unit: NPU precision: int8 layer_info: @@ -119,14 +119,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 108 - job_id: jgjvokkeg + job_id: j5welvr35 job_status: Passed torchscript_onnx_qnn: - inference_time: 458.0 - throughput: 2183.406113537118 + inference_time: 463.0 + throughput: 2159.827213822894 estimated_peak_memory_range: - min: 0 - max: 18437648 + min: 12288 + max: 22165056 primary_compute_unit: NPU precision: int8 layer_info: @@ -134,7 +134,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jprvo117g + job_id: jgn6zywq5 + job_status: Passed + torchscript_onnx: + inference_time: 99675.0 + throughput: 10.032605969400551 + estimated_peak_memory_range: + min: 41283584 + max: 1711663040 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 369 + layers_on_gpu: 0 + layers_on_cpu: 87 + total_layers: 456 + job_id: jped710v5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -143,13 +158,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T04:00:25Z' + timestamp: '2024-12-12T01:29:23Z' - torchscript_onnx_tflite: - inference_time: 289.0 - throughput: 3460.2076124567475 + inference_time: 290.0 + throughput: 3448.2758620689656 estimated_peak_memory_range: min: 8192 - max: 12627648 + max: 17715152 primary_compute_unit: NPU precision: int8 layer_info: @@ -157,14 +172,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 108 - job_id: jped844v5 + job_id: jg9lz1qwg job_status: Passed torchscript_onnx_qnn: - inference_time: 464.0 - throughput: 2155.1724137931033 + inference_time: 472.0 + throughput: 2118.64406779661 estimated_peak_memory_range: min: 159744 - max: 14488128 + max: 17980352 primary_compute_unit: NPU precision: int8 layer_info: @@ -172,7 +187,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jp2k433qp + job_id: jprvlq77g + job_status: Passed + torchscript_onnx: + inference_time: 101059.0 + throughput: 9.895209728970206 + estimated_peak_memory_range: + min: 22753280 + max: 1516985664 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 369 + layers_on_gpu: 0 + layers_on_cpu: 87 + total_layers: 456 + job_id: jgz3l9qx5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -181,13 +211,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T04:00:27Z' + timestamp: '2024-12-12T01:29:25Z' - torchscript_onnx_tflite: - inference_time: 1062.0 - throughput: 941.6195856873823 + inference_time: 1061.0 + throughput: 942.5070688030161 estimated_peak_memory_range: min: 12288 - max: 16013392 + max: 19877248 primary_compute_unit: NPU precision: int8 layer_info: @@ -195,14 +225,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 108 - job_id: jgz38vvx5 + job_id: jp14nlm8p job_status: Passed torchscript_onnx_qnn: - inference_time: 1481.0 - throughput: 675.219446320054 + inference_time: 1456.0 + throughput: 686.8131868131868 estimated_peak_memory_range: - min: 12288 - max: 8012848 + min: 32768 + max: 7379984 primary_compute_unit: NPU precision: int8 layer_info: @@ -210,7 +240,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jpy1qvvlp + job_id: jp2kr6zqp job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -219,13 +249,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T04:00:07Z' + timestamp: '2024-12-12T01:29:04Z' - torchscript_onnx_tflite: - inference_time: 12351.0 - throughput: 80.96510404015869 + inference_time: 12359.0 + throughput: 80.91269520187717 estimated_peak_memory_range: - min: 245760 - max: 13087600 + min: 262144 + max: 7360784 primary_compute_unit: NPU precision: int8 layer_info: @@ -233,7 +263,7 @@ models: layers_on_gpu: 1 layers_on_cpu: 0 total_layers: 108 - job_id: j5we8mmm5 + job_id: jgdxd9mrp job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -242,13 +272,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T03:59:47Z' + timestamp: '2024-12-12T01:28:45Z' - torchscript_onnx_tflite: - inference_time: 426.0 - throughput: 2347.417840375587 + inference_time: 427.0 + throughput: 2341.92037470726 estimated_peak_memory_range: - min: 12288 - max: 6808040 + min: 20480 + max: 7000304 primary_compute_unit: NPU precision: int8 layer_info: @@ -256,14 +286,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 108 - job_id: jg9lk998g + job_id: j5welvrm5 job_status: Passed torchscript_onnx_qnn: inference_time: 589.0 throughput: 1697.792869269949 estimated_peak_memory_range: min: 188416 - max: 1483440 + max: 1494680 primary_compute_unit: NPU precision: int8 layer_info: @@ -271,7 +301,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jp0zdeen5 + job_id: jpy1owylp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -280,13 +310,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T04:00:09Z' + timestamp: '2024-12-12T01:29:07Z' - torchscript_onnx_tflite: - inference_time: 1904.0 - throughput: 525.2100840336135 + inference_time: 1869.0 + throughput: 535.0454788657036 estimated_peak_memory_range: - min: 12288 - max: 14231744 + min: 16384 + max: 17341248 primary_compute_unit: NPU precision: int8 layer_info: @@ -294,14 +324,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 108 - job_id: jp147qq7p + job_id: jg9lz1q8g job_status: Passed torchscript_onnx_qnn: - inference_time: 2574.0 - throughput: 388.5003885003885 + inference_time: 2623.0 + throughput: 381.2428516965307 estimated_peak_memory_range: - min: 98304 - max: 5965056 + min: 94208 + max: 10613040 primary_compute_unit: NPU precision: int8 layer_info: @@ -309,7 +339,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jgkeorrng + job_id: jp8qe9kop job_status: Passed reference_device_info: name: SA7255P ADP @@ -318,13 +348,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T04:00:12Z' + timestamp: '2024-12-12T01:29:10Z' - torchscript_onnx_tflite: - inference_time: 430.0 - throughput: 2325.5813953488373 + inference_time: 428.0 + throughput: 2336.448598130841 estimated_peak_memory_range: - min: 12288 - max: 17170888 + min: 16384 + max: 55886840 primary_compute_unit: NPU precision: int8 layer_info: @@ -332,14 +362,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 108 - job_id: jgdx877zp + job_id: jp14nlm7p job_status: Passed torchscript_onnx_qnn: - inference_time: 586.0 - throughput: 1706.4846416382252 + inference_time: 591.0 + throughput: 1692.047377326565 estimated_peak_memory_range: - min: 172032 - max: 1801144 + min: 180224 + max: 1343928 primary_compute_unit: NPU precision: int8 layer_info: @@ -347,7 +377,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: j5q6z99op + job_id: jgke2nkng job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -356,13 +386,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T04:00:14Z' + timestamp: '2024-12-12T01:29:12Z' - torchscript_onnx_tflite: - inference_time: 792.0 - throughput: 1262.6262626262626 + inference_time: 814.0 + throughput: 1228.5012285012285 estimated_peak_memory_range: - min: 12288 - max: 12012624 + min: 16384 + max: 12185024 primary_compute_unit: NPU precision: int8 layer_info: @@ -370,14 +400,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 108 - job_id: j57ykvv95 + job_id: jgdxd9mzp job_status: Passed torchscript_onnx_qnn: - inference_time: 1704.0 - throughput: 586.8544600938967 + inference_time: 1071.0 + throughput: 933.7068160597572 estimated_peak_memory_range: - min: 0 - max: 5813648 + min: 167936 + max: 6125536 primary_compute_unit: NPU precision: int8 layer_info: @@ -385,7 +415,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jglvoe7m5 + job_id: jglvyz9m5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -394,13 +424,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T04:00:16Z' + timestamp: '2024-12-12T01:29:14Z' - torchscript_onnx_tflite: - inference_time: 427.0 - throughput: 2341.92037470726 + inference_time: 428.0 + throughput: 2336.448598130841 estimated_peak_memory_range: min: 12288 - max: 6714976 + max: 111036000 primary_compute_unit: NPU precision: int8 layer_info: @@ -408,14 +438,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 108 - job_id: jp4lmjj15 + job_id: j57yew895 job_status: Passed torchscript_onnx_qnn: - inference_time: 585.0 - throughput: 1709.4017094017095 + inference_time: 589.0 + throughput: 1697.792869269949 estimated_peak_memory_range: min: 180224 - max: 1384400 + max: 1500976 primary_compute_unit: NPU precision: int8 layer_info: @@ -423,7 +453,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: j56yrqvyp + job_id: j56y8j9yp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -432,13 +462,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T04:00:18Z' + timestamp: '2024-12-12T01:29:16Z' - torchscript_onnx_tflite: - inference_time: 766.0 - throughput: 1305.4830287206266 + inference_time: 759.0 + throughput: 1317.5230566534915 estimated_peak_memory_range: min: 16384 - max: 12247504 + max: 17472064 primary_compute_unit: NPU precision: int8 layer_info: @@ -446,14 +476,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 108 - job_id: jpxk3eel5 + job_id: jp4lyo215 job_status: Passed torchscript_onnx_qnn: - inference_time: 1131.0 - throughput: 884.1732979664014 + inference_time: 1150.0 + throughput: 869.5652173913044 estimated_peak_memory_range: - min: 167936 - max: 5910560 + min: 163840 + max: 6154752 primary_compute_unit: NPU precision: int8 layer_info: @@ -461,7 +491,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jp3jxq8ng + job_id: jp3jz3lng job_status: Passed reference_device_info: name: SA8775P ADP @@ -470,13 +500,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T04:00:20Z' + timestamp: '2024-12-12T01:29:18Z' - torchscript_onnx_tflite: - inference_time: 480.0 - throughput: 2083.3333333333335 + inference_time: 470.0 + throughput: 2127.659574468085 estimated_peak_memory_range: - min: 16384 - max: 17856768 + min: 12288 + max: 24109488 primary_compute_unit: NPU precision: int8 layer_info: @@ -484,14 +514,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 108 - job_id: j5mnovv9p + job_id: jpxkljzl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 690.0 - throughput: 1449.2753623188405 + inference_time: 689.0 + throughput: 1451.3788098693758 estimated_peak_memory_range: - min: 172032 - max: 20385536 + min: 143360 + max: 18266080 primary_compute_unit: NPU precision: int8 layer_info: @@ -499,7 +529,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jgo2oemkp + job_id: jgo2l07kp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -508,13 +538,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T04:00:22Z' + timestamp: '2024-12-12T01:29:20Z' - torchscript_onnx_qnn: - inference_time: 742.0 - throughput: 1347.7088948787061 + inference_time: 719.0 + throughput: 1390.8205841446454 estimated_peak_memory_range: - min: 512000 - max: 512000 + min: 536576 + max: 536576 primary_compute_unit: NPU precision: int8 layer_info: @@ -522,14 +552,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jp8q6wwop + job_id: jp0zmqxn5 job_status: Passed torchscript_onnx: - inference_time: 141372.0 - throughput: 7.073536485301191 + inference_time: 136247.0 + throughput: 7.339611147401411 estimated_peak_memory_range: - min: 62918656 - max: 62918656 + min: 62750720 + max: 62750720 primary_compute_unit: NPU precision: int8 layer_info: @@ -537,7 +567,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 87 total_layers: 456 - job_id: jgz38vwx5 + job_id: j5welv0m5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -546,4 +576,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T04:00:29Z' + timestamp: '2024-12-12T01:29:27Z' diff --git a/qai_hub_models/models/mobilenet_v3_large/perf.yaml b/qai_hub_models/models/mobilenet_v3_large/perf.yaml index adedbcc5..49b7d294 100644 --- a/qai_hub_models/models/mobilenet_v3_large/perf.yaml +++ b/qai_hub_models/models/mobilenet_v3_large/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: MobileNet-v3-Large universal_assets: - torchscript_onnx_tflite: mnzvwx8oq - torchscript_onnx: mn1zy9drm + torchscript_onnx_tflite: mq36egvkq + torchscript_onnx: mq21426wm performance_metrics: - torchscript_onnx_tflite: - inference_time: 996.0 - throughput: 1004.0160642570281 + inference_time: 995.0 + throughput: 1005.0251256281407 estimated_peak_memory_range: min: 20480 - max: 5581360 + max: 67656928 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jgn6ol4r5 + job_id: jp2krz6xp job_status: Passed torchscript_onnx_qnn: - inference_time: 1035.0 - throughput: 966.1835748792271 + inference_time: 1055.0 + throughput: 947.8672985781991 estimated_peak_memory_range: min: 16384 - max: 56671728 + max: 106052904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jp3jxvdlg + job_id: jpv6lno75 job_status: Passed torchscript_onnx: - inference_time: 993.0 - throughput: 1007.0493454179255 + inference_time: 1025.0 + throughput: 975.609756097561 estimated_peak_memory_range: - min: 626688 - max: 157846960 + min: 520192 + max: 2326936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jp4lmkzv5 + job_id: j5mn0l2yp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:49:08Z' + timestamp: '2024-12-11T23:14:22Z' - torchscript_onnx_tflite: inference_time: 672.0 throughput: 1488.095238095238 estimated_peak_memory_range: - min: 20480 - max: 17066624 + min: 16384 + max: 23013056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jprvo8r9g + job_id: jpy1oywrp job_status: Passed torchscript_onnx_qnn: - inference_time: 721.0 - throughput: 1386.9625520110958 + inference_time: 716.0 + throughput: 1396.6480446927374 estimated_peak_memory_range: min: 0 - max: 20071600 + max: 21126416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jgo2okxxp + job_id: jgjvr8m7g job_status: Passed torchscript_onnx: - inference_time: 716.0 - throughput: 1396.6480446927374 + inference_time: 733.0 + throughput: 1364.256480218281 estimated_peak_memory_range: min: 0 - max: 72228976 + max: 73479904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jpxk3nw15 + job_id: jgn6zw8v5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:49:09Z' + timestamp: '2024-12-11T23:14:23Z' - torchscript_onnx_tflite: - inference_time: 678.0 - throughput: 1474.9262536873157 + inference_time: 675.0 + throughput: 1481.4814814814815 estimated_peak_memory_range: - min: 16384 - max: 15294528 + min: 12288 + max: 19981408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jp2k4014p + job_id: jp0zmxq25 job_status: Passed torchscript_onnx_qnn: inference_time: 717.0 throughput: 1394.700139470014 estimated_peak_memory_range: - min: 0 - max: 14471440 + min: 614400 + max: 19443984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jpv6e08j5 + job_id: jped7n175 job_status: Passed torchscript_onnx: - inference_time: 733.0 - throughput: 1364.256480218281 + inference_time: 730.0 + throughput: 1369.86301369863 estimated_peak_memory_range: min: 0 - max: 28565216 + max: 27502464 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: j5mnoqjwp + job_id: jprvl7jvg job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:49:10Z' + timestamp: '2024-12-11T23:14:25Z' - torchscript_onnx_tflite: - inference_time: 991.0 - throughput: 1009.0817356205853 + inference_time: 995.0 + throughput: 1005.0251256281407 estimated_peak_memory_range: - min: 16384 - max: 77739056 + min: 20480 + max: 326660288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jpy1qrl7p + job_id: jp8qek9zp job_status: Passed torchscript_onnx_qnn: - inference_time: 1002.0 - throughput: 998.003992015968 + inference_time: 993.0 + throughput: 1007.0493454179255 estimated_peak_memory_range: - min: 638976 - max: 2001432 + min: 643072 + max: 2237104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jgjvoz9xg + job_id: jgz3l09z5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:48:59Z' + timestamp: '2024-12-11T23:14:13Z' - torchscript_onnx_tflite: - inference_time: 15166.0 - throughput: 65.93696426216538 + inference_time: 15180.0 + throughput: 65.87615283267458 estimated_peak_memory_range: - min: 49152 - max: 13969344 + min: 36864 + max: 15491744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jp0zd3w65 + job_id: jgke2knyg job_status: Passed torchscript_onnx_qnn: - inference_time: 14806.0 - throughput: 67.5401864109145 + inference_time: 14846.0 + throughput: 67.35821096591674 estimated_peak_memory_range: - min: 589824 - max: 6232912 + min: 638976 + max: 10935216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jgz38o6k5 + job_id: jg9lzq1qg job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:49:01Z' + timestamp: '2024-12-11T23:14:15Z' - torchscript_onnx_tflite: - inference_time: 993.0 - throughput: 1007.0493454179255 + inference_time: 989.0 + throughput: 1011.1223458038422 estimated_peak_memory_range: - min: 16384 - max: 78119080 + min: 0 + max: 77705304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jp8q60nxp + job_id: j5q6ldk7p job_status: Passed torchscript_onnx_qnn: - inference_time: 999.0 - throughput: 1001.001001001001 + inference_time: 998.0 + throughput: 1002.0040080160321 estimated_peak_memory_range: - min: 663552 - max: 1999096 + min: 634880 + max: 1868608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: j5we82j65 + job_id: jp14nmlkp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:49:02Z' + timestamp: '2024-12-11T23:14:16Z' - torchscript_onnx_tflite: - inference_time: 1839.0 - throughput: 543.773790103317 + inference_time: 1864.0 + throughput: 536.480686695279 estimated_peak_memory_range: min: 16384 - max: 13713664 + max: 13093424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jgkeo712g + job_id: jglvyqze5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1906.0 - throughput: 524.6589716684156 + inference_time: 1907.0 + throughput: 524.3838489774515 estimated_peak_memory_range: min: 0 - max: 5797664 + max: 6006368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jg9lkj6lg + job_id: jgdxdm9kp job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:49:03Z' + timestamp: '2024-12-11T23:14:18Z' - torchscript_onnx_tflite: - inference_time: 993.0 - throughput: 1007.0493454179255 + inference_time: 991.0 + throughput: 1009.0817356205853 estimated_peak_memory_range: min: 20480 - max: 5613424 + max: 78093088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: j5q6zen4p + job_id: j56y80jvp job_status: Passed torchscript_onnx_qnn: - inference_time: 998.0 - throughput: 1002.0040080160321 + inference_time: 1009.0 + throughput: 991.0802775024777 estimated_peak_memory_range: min: 634880 - max: 2222352 + max: 1793808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jp147yr2p + job_id: j57ye8wq5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:49:04Z' + timestamp: '2024-12-11T23:14:19Z' - torchscript_onnx_tflite: - inference_time: 1690.0 - throughput: 591.7159763313609 + inference_time: 1686.0 + throughput: 593.1198102016607 estimated_peak_memory_range: - min: 16384 - max: 15375248 + min: 20480 + max: 18630704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jglvo6d85 + job_id: jp3jzr3xg job_status: Passed torchscript_onnx_qnn: - inference_time: 1829.0 - throughput: 546.7468562055768 + inference_time: 1824.0 + throughput: 548.2456140350877 estimated_peak_memory_range: - min: 622592 - max: 6376960 + min: 618496 + max: 6807664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jgdx8ejep + job_id: jp4ly2oq5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:49:05Z' + timestamp: '2024-12-11T23:14:20Z' - torchscript_onnx_tflite: - inference_time: 1393.0 - throughput: 717.8750897343862 + inference_time: 1383.0 + throughput: 723.0657989877079 estimated_peak_memory_range: - min: 24576 - max: 19216448 + min: 16384 + max: 23951872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: j56yrex0p + job_id: jgo2l904p job_status: Passed torchscript_onnx_qnn: - inference_time: 1454.0 - throughput: 687.757909215956 + inference_time: 1451.0 + throughput: 689.1798759476223 estimated_peak_memory_range: min: 618496 - max: 21742704 + max: 23619456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: j57yk0ql5 + job_id: jpxklzjj5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:49:07Z' + timestamp: '2024-12-11T23:14:21Z' - torchscript_onnx_qnn: - inference_time: 1138.0 - throughput: 878.7346221441124 + inference_time: 1165.0 + throughput: 858.3690987124463 estimated_peak_memory_range: min: 602112 max: 602112 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jped8eq15 + job_id: j5welrvz5 job_status: Passed torchscript_onnx: - inference_time: 1048.0 - throughput: 954.1984732824427 + inference_time: 1044.0 + throughput: 957.8544061302682 estimated_peak_memory_range: - min: 13721600 - max: 13721600 + min: 15007744 + max: 15007744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jgn6oljr5 + job_id: jp2krznxp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:49:11Z' + timestamp: '2024-12-11T23:14:26Z' diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/evaluate.py b/qai_hub_models/models/mobilenet_v3_large_quantized/evaluate.py index 6716f23c..44de0607 100644 --- a/qai_hub_models/models/mobilenet_v3_large_quantized/evaluate.py +++ b/qai_hub_models/models/mobilenet_v3_large_quantized/evaluate.py @@ -25,6 +25,7 @@ def main(): model_cls=Model, default_split_size=2500, supported_datasets=SUPPORTED_DATASETS, + supports_onnx=False, is_hub_quantized=True, ) args = parser.parse_args() diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/export.py b/qai_hub_models/models/mobilenet_v3_large_quantized/export.py index 1ba94f4b..93248844 100644 --- a/qai_hub_models/models/mobilenet_v3_large_quantized/export.py +++ b/qai_hub_models/models/mobilenet_v3_large_quantized/export.py @@ -237,7 +237,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, is_hub_quantized=True) + parser = export_parser(model_cls=Model, supports_onnx=False, is_hub_quantized=True) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/info.yaml b/qai_hub_models/models/mobilenet_v3_large_quantized/info.yaml index 819a755c..2ab768dd 100644 --- a/qai_hub_models/models/mobilenet_v3_large_quantized/info.yaml +++ b/qai_hub_models/models/mobilenet_v3_large_quantized/info.yaml @@ -12,6 +12,7 @@ tags: - quantized - backbone - real-time +imsdk_supported: true research_paper: https://arxiv.org/abs/1905.02244 research_paper_title: Searching for MobileNetV3 license: https://github.com/pytorch/vision/blob/main/LICENSE diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml b/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml index 81312978..84d623be 100644 --- a/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml +++ b/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml @@ -50,15 +50,14 @@ aggregated: models: - name: MobileNet-v3-Large-Quantized universal_assets: - torchscript_onnx_tflite: mqpzyvegn - torchscript_onnx: mno30ekpn + torchscript_onnx_tflite: mq36eox3q performance_metrics: - torchscript_onnx_tflite: - inference_time: 343.0 - throughput: 2915.451895043732 + inference_time: 342.0 + throughput: 2923.9766081871344 estimated_peak_memory_range: - min: 16384 - max: 15535768 + min: 12288 + max: 15871648 primary_compute_unit: NPU precision: int8 layer_info: @@ -66,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 137 - job_id: jglvoekj5 + job_id: jglvyzwj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 633.0 - throughput: 1579.778830963665 + inference_time: 632.0 + throughput: 1582.2784810126582 estimated_peak_memory_range: min: 16384 - max: 14807520 + max: 14910040 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: j5we8mdm5 + job_id: j57yewnv5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -90,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T03:59:12Z' + timestamp: '2024-12-12T01:27:49Z' - torchscript_onnx_tflite: - inference_time: 245.0 - throughput: 4081.6326530612246 + inference_time: 251.0 + throughput: 3984.06374501992 estimated_peak_memory_range: - min: 24576 - max: 18724480 + min: 12288 + max: 22094000 primary_compute_unit: NPU precision: int8 layer_info: @@ -104,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 137 - job_id: j56yrq16p + job_id: j56y8jo6p job_status: Passed torchscript_onnx_qnn: - inference_time: 450.0 - throughput: 2222.222222222222 + inference_time: 456.0 + throughput: 2192.9824561403507 estimated_peak_memory_range: - min: 0 - max: 16394608 + min: 163840 + max: 14209408 primary_compute_unit: NPU precision: int8 layer_info: @@ -119,7 +118,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jg9lk938g + job_id: jp4lyo485 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -128,13 +127,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T03:59:14Z' + timestamp: '2024-12-12T01:27:51Z' - torchscript_onnx_tflite: - inference_time: 210.0 - throughput: 4761.9047619047615 + inference_time: 250.0 + throughput: 4000.0 estimated_peak_memory_range: min: 12288 - max: 12937072 + max: 13661024 primary_compute_unit: NPU precision: int8 layer_info: @@ -142,14 +141,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 137 - job_id: jp3jxqm3g + job_id: jp3jz3o3g job_status: Passed torchscript_onnx_qnn: - inference_time: 470.0 - throughput: 2127.659574468085 + inference_time: 376.0 + throughput: 2659.574468085106 estimated_peak_memory_range: min: 0 - max: 13289408 + max: 18615456 primary_compute_unit: NPU precision: int8 layer_info: @@ -157,7 +156,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jp147qd7p + job_id: jpxkljr35 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -166,13 +165,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T03:59:15Z' + timestamp: '2024-12-12T01:27:53Z' - torchscript_onnx_tflite: - inference_time: 1105.0 - throughput: 904.9773755656108 + inference_time: 1184.0 + throughput: 844.5945945945946 estimated_peak_memory_range: min: 12288 - max: 16796064 + max: 21366192 primary_compute_unit: NPU precision: int8 layer_info: @@ -180,14 +179,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 137 - job_id: jgo2oevqp + job_id: jgo2l0dqp job_status: Passed torchscript_onnx_qnn: - inference_time: 1674.0 - throughput: 597.3715651135007 + inference_time: 1711.0 + throughput: 584.4535359438925 estimated_peak_memory_range: min: 163840 - max: 7490064 + max: 7323888 primary_compute_unit: NPU precision: int8 layer_info: @@ -195,7 +194,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jgdx87rzp + job_id: j5mn02kdp job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -204,13 +203,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T03:58:56Z' + timestamp: '2024-12-12T01:27:55Z' - torchscript_onnx_tflite: - inference_time: 6766.0 - throughput: 147.79781259237365 + inference_time: 6724.0 + throughput: 148.720999405116 estimated_peak_memory_range: - min: 49152 - max: 7018272 + min: 16384 + max: 2727488 primary_compute_unit: NPU precision: int8 layer_info: @@ -218,7 +217,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 137 - job_id: jpv6ez1k5 + job_id: jpv6lomk5 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -227,13 +226,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T03:58:36Z' + timestamp: '2024-12-12T01:27:35Z' - torchscript_onnx_tflite: - inference_time: 341.0 - throughput: 2932.551319648094 + inference_time: 344.0 + throughput: 2906.9767441860463 estimated_peak_memory_range: - min: 12288 - max: 15180304 + min: 16384 + max: 27077264 primary_compute_unit: NPU precision: int8 layer_info: @@ -241,14 +240,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 137 - job_id: jgjvok0vg + job_id: jgjvrmyvg job_status: Passed torchscript_onnx_qnn: inference_time: 582.0 throughput: 1718.213058419244 estimated_peak_memory_range: min: 184320 - max: 1859160 + max: 1746408 primary_compute_unit: NPU precision: int8 layer_info: @@ -256,7 +255,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: j57ykvj95 + job_id: jgn6zyqk5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -265,13 +264,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T03:58:57Z' + timestamp: '2024-12-12T01:27:56Z' - torchscript_onnx_tflite: - inference_time: 2613.0 - throughput: 382.70187523918867 + inference_time: 2567.0 + throughput: 389.5597974289053 estimated_peak_memory_range: - min: 0 - max: 15055040 + min: 16384 + max: 18356032 primary_compute_unit: NPU precision: int8 layer_info: @@ -279,14 +278,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 137 - job_id: jped84ro5 + job_id: jped71xo5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3137.0 - throughput: 318.77590054191904 + inference_time: 3203.0 + throughput: 312.2073056509522 estimated_peak_memory_range: - min: 102400 - max: 5770624 + min: 163840 + max: 10632480 primary_compute_unit: NPU precision: int8 layer_info: @@ -294,7 +293,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jpxk3e7l5 + job_id: jp2kr6drp job_status: Passed reference_device_info: name: SA7255P ADP @@ -303,13 +302,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T03:59:01Z' + timestamp: '2024-12-12T01:28:00Z' - torchscript_onnx_tflite: - inference_time: 354.0 - throughput: 2824.858757062147 + inference_time: 347.0 + throughput: 2881.844380403458 estimated_peak_memory_range: - min: 24576 - max: 25568680 + min: 16384 + max: 93188488 primary_compute_unit: NPU precision: int8 layer_info: @@ -317,14 +316,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 137 - job_id: jgz38vxo5 + job_id: jgz3l9yo5 job_status: Passed torchscript_onnx_qnn: - inference_time: 579.0 - throughput: 1727.1157167530225 + inference_time: 583.0 + throughput: 1715.2658662092624 estimated_peak_memory_range: - min: 172032 - max: 1507832 + min: 40960 + max: 1546160 primary_compute_unit: NPU precision: int8 layer_info: @@ -332,7 +331,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: j5mnovw9p + job_id: jpy1ow28p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -341,13 +340,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T03:59:03Z' + timestamp: '2024-12-12T01:28:02Z' - torchscript_onnx_tflite: - inference_time: 785.0 - throughput: 1273.8853503184714 + inference_time: 780.0 + throughput: 1282.051282051282 estimated_peak_memory_range: min: 12288 - max: 12742608 + max: 13505552 primary_compute_unit: NPU precision: int8 layer_info: @@ -355,14 +354,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 137 - job_id: j5we8md35 + job_id: j5welvz35 job_status: Passed torchscript_onnx_qnn: - inference_time: 1326.0 - throughput: 754.1478129713424 + inference_time: 1302.0 + throughput: 768.0491551459294 estimated_peak_memory_range: min: 0 - max: 6105760 + max: 5716352 primary_compute_unit: NPU precision: int8 layer_info: @@ -370,7 +369,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jgn6or9q5 + job_id: jp0zmq995 job_status: Passed reference_device_info: name: SA8295P ADP @@ -379,13 +378,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T03:59:05Z' + timestamp: '2024-12-12T01:28:04Z' - torchscript_onnx_tflite: - inference_time: 338.0 - throughput: 2958.579881656805 + inference_time: 348.0 + throughput: 2873.5632183908046 estimated_peak_memory_range: - min: 20480 - max: 5932464 + min: 24576 + max: 15035792 primary_compute_unit: NPU precision: int8 layer_info: @@ -393,14 +392,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 137 - job_id: jg9lk93wg + job_id: jg9lz12wg job_status: Passed torchscript_onnx_qnn: inference_time: 583.0 throughput: 1715.2658662092624 estimated_peak_memory_range: - min: 184320 - max: 1993520 + min: 180224 + max: 1544648 primary_compute_unit: NPU precision: int8 layer_info: @@ -408,7 +407,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jprvo147g + job_id: jp8qe9rkp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -417,13 +416,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T03:59:07Z' + timestamp: '2024-12-12T01:28:06Z' - torchscript_onnx_tflite: - inference_time: 672.0 - throughput: 1488.095238095238 + inference_time: 670.0 + throughput: 1492.5373134328358 estimated_peak_memory_range: - min: 12288 - max: 13093776 + min: 16384 + max: 18847904 primary_compute_unit: NPU precision: int8 layer_info: @@ -431,14 +430,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 137 - job_id: jp147qd8p + job_id: jp14nl18p job_status: Passed torchscript_onnx_qnn: - inference_time: 1131.0 - throughput: 884.1732979664014 + inference_time: 1133.0 + throughput: 882.61253309797 estimated_peak_memory_range: - min: 172032 - max: 6083616 + min: 163840 + max: 6097408 primary_compute_unit: NPU precision: int8 layer_info: @@ -446,7 +445,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jp2k437qp + job_id: jgke2n0wg job_status: Passed reference_device_info: name: SA8775P ADP @@ -455,13 +454,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T03:59:08Z' + timestamp: '2024-12-12T01:28:08Z' - torchscript_onnx_tflite: - inference_time: 436.0 - throughput: 2293.577981651376 + inference_time: 444.0 + throughput: 2252.252252252252 estimated_peak_memory_range: - min: 16384 - max: 18413168 + min: 12288 + max: 24916512 primary_compute_unit: NPU precision: int8 layer_info: @@ -469,14 +468,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 137 - job_id: jgdx87rrp + job_id: jgdxd94rp job_status: Passed torchscript_onnx_qnn: - inference_time: 759.0 - throughput: 1317.5230566534915 + inference_time: 753.0 + throughput: 1328.0212483399735 estimated_peak_memory_range: min: 167936 - max: 18587392 + max: 20827280 primary_compute_unit: NPU precision: int8 layer_info: @@ -484,7 +483,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jpy1qv4lp + job_id: j5q6lk1np job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -493,13 +492,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T03:59:10Z' + timestamp: '2024-12-12T01:28:10Z' - torchscript_onnx_qnn: - inference_time: 714.0 - throughput: 1400.5602240896358 + inference_time: 1719.0 + throughput: 581.7335660267597 estimated_peak_memory_range: - min: 548864 - max: 548864 + min: 536576 + max: 536576 primary_compute_unit: NPU precision: int8 layer_info: @@ -507,22 +506,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jp4lmjx15 - job_status: Passed - torchscript_onnx: - inference_time: 191803.0 - throughput: 5.213682789111745 - estimated_peak_memory_range: - min: 57819136 - max: 57819136 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 339 - layers_on_gpu: 0 - layers_on_cpu: 62 - total_layers: 401 - job_id: j5q6z97op + job_id: jprvlqd0g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -531,4 +515,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T03:59:17Z' + timestamp: '2024-12-12T01:27:58Z' diff --git a/qai_hub_models/models/mobilenet_v3_small/perf.yaml b/qai_hub_models/models/mobilenet_v3_small/perf.yaml index 3fc10a77..c8477475 100644 --- a/qai_hub_models/models/mobilenet_v3_small/perf.yaml +++ b/qai_hub_models/models/mobilenet_v3_small/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: MobileNet-v3-Small universal_assets: - torchscript_onnx_tflite: mqv489yjq - torchscript_onnx: mmx71d8rq + torchscript_onnx_tflite: mqe7xk3vm + torchscript_onnx: mn4l19gzq performance_metrics: - torchscript_onnx_tflite: - inference_time: 812.0 - throughput: 1231.527093596059 + inference_time: 814.0 + throughput: 1228.5012285012285 estimated_peak_memory_range: - min: 16384 - max: 44661768 + min: 28672 + max: 44835200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 115 - job_id: jpxk3n895 + job_id: jgdxdmjkp job_status: Passed torchscript_onnx_qnn: inference_time: 865.0 throughput: 1156.0693641618498 estimated_peak_memory_range: - min: 16384 - max: 34055296 + min: 2109440 + max: 5947408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jglvo69l5 + job_id: jp8qekozp job_status: Passed torchscript_onnx: - inference_time: 829.0 - throughput: 1206.2726176115802 + inference_time: 803.0 + throughput: 1245.3300124533 estimated_peak_memory_range: - min: 520192 - max: 2051448 + min: 16384 + max: 7167008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jgdx8eylp + job_id: j5welroz5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:48:23Z' + timestamp: '2024-12-11T23:13:37Z' - torchscript_onnx_tflite: - inference_time: 536.0 - throughput: 1865.6716417910447 + inference_time: 537.0 + throughput: 1862.1973929236499 estimated_peak_memory_range: min: 16384 - max: 15603584 + max: 17518240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 115 - job_id: j5mnoq1qp + job_id: j57ye8qq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 574.0 - throughput: 1742.1602787456445 + inference_time: 573.0 + throughput: 1745.2006980802792 estimated_peak_memory_range: - min: 618496 - max: 14284576 + min: 0 + max: 11095504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j56yre97p + job_id: jgke2kzyg job_status: Passed torchscript_onnx: - inference_time: 567.0 - throughput: 1763.668430335097 + inference_time: 577.0 + throughput: 1733.102253032929 estimated_peak_memory_range: min: 0 - max: 50761760 + max: 50589152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: j5we82k65 + job_id: jg9lzqvqg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:48:24Z' + timestamp: '2024-12-11T23:13:38Z' - torchscript_onnx_tflite: - inference_time: 563.0 - throughput: 1776.1989342806394 + inference_time: 551.0 + throughput: 1814.8820326678765 estimated_peak_memory_range: min: 12288 - max: 11420160 + max: 11392928 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 115 - job_id: jgn6oldm5 + job_id: jp4ly2zq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 583.0 - throughput: 1715.2658662092624 + inference_time: 611.0 + throughput: 1636.6612111292961 estimated_peak_memory_range: min: 0 - max: 11551344 + max: 10624704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jp3jxvlzg + job_id: j5q6ld87p job_status: Passed torchscript_onnx: - inference_time: 595.0 - throughput: 1680.672268907563 + inference_time: 603.0 + throughput: 1658.374792703151 estimated_peak_memory_range: min: 0 - max: 23936352 + max: 23890656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jg9lkjrlg + job_id: jp14nm0kp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:48:25Z' + timestamp: '2024-12-11T23:13:39Z' - torchscript_onnx_tflite: - inference_time: 812.0 - throughput: 1231.527093596059 + inference_time: 815.0 + throughput: 1226.993865030675 estimated_peak_memory_range: min: 16384 - max: 5257792 + max: 4692248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 115 - job_id: jprvo8meg + job_id: jpxklzwj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 838.0 - throughput: 1193.3174224343675 + inference_time: 834.0 + throughput: 1199.0407673860911 estimated_peak_memory_range: min: 634880 - max: 1973912 + max: 2134920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgo2ok7dp + job_id: jglvyqne5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:48:13Z' + timestamp: '2024-12-11T23:13:28Z' - torchscript_onnx_tflite: - inference_time: 7794.0 - throughput: 128.30382345393892 + inference_time: 7744.0 + throughput: 129.13223140495867 estimated_peak_memory_range: - min: 32768 - max: 11273120 + min: 20480 + max: 15702224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 115 - job_id: jp2k40qmp + job_id: j5mn0ljyp job_status: Passed torchscript_onnx_qnn: - inference_time: 7675.0 - throughput: 130.29315960912052 + inference_time: 7627.0 + throughput: 131.1131506490101 estimated_peak_memory_range: - min: 602112 - max: 6217088 + min: 618496 + max: 11039312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgjvoz68g + job_id: jp3jzrkxg job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:48:16Z' + timestamp: '2024-12-11T23:13:30Z' - torchscript_onnx_tflite: inference_time: 815.0 throughput: 1226.993865030675 estimated_peak_memory_range: min: 16384 - max: 119096880 + max: 179529368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 115 - job_id: jpy1qrk4p + job_id: jgn6zw2v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 836.0 - throughput: 1196.1722488038276 + inference_time: 843.0 + throughput: 1186.2396204033214 estimated_peak_memory_range: min: 634880 - max: 1954984 + max: 1825160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jped8e005 + job_id: jgo2l9y4p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:48:17Z' + timestamp: '2024-12-11T23:13:32Z' - torchscript_onnx_tflite: - inference_time: 1532.0 - throughput: 652.7415143603133 + inference_time: 1547.0 + throughput: 646.4124111182934 estimated_peak_memory_range: - min: 20480 - max: 9724816 + min: 16384 + max: 10206752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 115 - job_id: jp0zd38e5 + job_id: jprvl7kvg job_status: Passed torchscript_onnx_qnn: - inference_time: 1564.0 - throughput: 639.386189258312 + inference_time: 1581.0 + throughput: 632.5110689437065 estimated_peak_memory_range: - min: 618496 - max: 6453136 + min: 0 + max: 6019904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jgz38oq65 + job_id: jpv6ln375 job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:48:18Z' + timestamp: '2024-12-11T23:13:33Z' - torchscript_onnx_tflite: - inference_time: 813.0 - throughput: 1230.0123001230013 + inference_time: 816.0 + throughput: 1225.4901960784314 estimated_peak_memory_range: - min: 28672 - max: 33978464 + min: 16384 + max: 172707952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 115 - job_id: jp8q60d8p + job_id: jp2krz8xp job_status: Passed torchscript_onnx_qnn: - inference_time: 844.0 - throughput: 1184.8341232227488 + inference_time: 841.0 + throughput: 1189.0606420927468 estimated_peak_memory_range: - min: 659456 - max: 2121952 + min: 651264 + max: 1877712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j5we820j5 + job_id: jgjvr8x7g job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:48:19Z' + timestamp: '2024-12-11T23:13:34Z' - torchscript_onnx_tflite: - inference_time: 1364.0 - throughput: 733.1378299120234 + inference_time: 1358.0 + throughput: 736.3770250368188 estimated_peak_memory_range: min: 16384 - max: 12155648 + max: 15829712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 115 - job_id: jgkeo7wog + job_id: jpy1oyerp job_status: Passed torchscript_onnx_qnn: - inference_time: 1539.0 - throughput: 649.772579597141 + inference_time: 1562.0 + throughput: 640.2048655569782 estimated_peak_memory_range: - min: 622592 - max: 6478864 + min: 618496 + max: 6749760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jg9lkj7vg + job_id: jped7n975 job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:48:20Z' + timestamp: '2024-12-11T23:13:35Z' - torchscript_onnx_tflite: - inference_time: 1097.0 - throughput: 911.5770282588878 + inference_time: 1088.0 + throughput: 919.1176470588235 estimated_peak_memory_range: min: 20480 - max: 18318784 + max: 22449056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 115 - job_id: j5q6zexmp + job_id: jp0zmxy25 job_status: Passed torchscript_onnx_qnn: - inference_time: 1155.0 - throughput: 865.8008658008658 + inference_time: 1158.0 + throughput: 863.5578583765113 estimated_peak_memory_range: - min: 622592 - max: 18491872 + min: 618496 + max: 23231936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jp147yklp + job_id: jgz3l0ez5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:48:22Z' + timestamp: '2024-12-11T23:13:36Z' - torchscript_onnx_qnn: - inference_time: 972.0 - throughput: 1028.80658436214 + inference_time: 982.0 + throughput: 1018.3299389002036 estimated_peak_memory_range: min: 602112 max: 602112 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jpv6e0ym5 + job_id: j56y806vp job_status: Passed torchscript_onnx: - inference_time: 5990.0 - throughput: 166.9449081803005 + inference_time: 870.0 + throughput: 1149.4252873563219 estimated_peak_memory_range: - min: 6344704 - max: 6344704 + min: 7823360 + max: 7823360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jp147y92p + job_id: jgdxdmwkp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:48:26Z' + timestamp: '2024-12-11T23:13:40Z' diff --git a/qai_hub_models/models/openai_clip/export.py b/qai_hub_models/models/openai_clip/export.py index cd8079c6..35dfbd37 100644 --- a/qai_hub_models/models/openai_clip/export.py +++ b/qai_hub_models/models/openai_clip/export.py @@ -31,7 +31,7 @@ export_without_hub_access, ) -ALL_COMPONENTS = ["CLIPTextEncoder", "CLIPImageEncoder"] +ALL_COMPONENTS = ["CLIPImageEncoder", "CLIPTextEncoder"] def export_model( @@ -123,10 +123,10 @@ def export_model( # 1. Instantiates a PyTorch model and converts it to a traced TorchScript format model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs)) components_dict: dict[str, BaseModel] = {} - if "CLIPTextEncoder" in components: - components_dict["CLIPTextEncoder"] = model.text_encoder # type: ignore if "CLIPImageEncoder" in components: components_dict["CLIPImageEncoder"] = model.image_encoder # type: ignore + if "CLIPTextEncoder" in components: + components_dict["CLIPTextEncoder"] = model.text_encoder # type: ignore compile_jobs: dict[str, hub.client.CompileJob] = {} for component_name, component in components_dict.items(): @@ -238,7 +238,9 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, components=ALL_COMPONENTS) + parser = export_parser( + model_cls=Model, components=ALL_COMPONENTS, supports_onnx=False + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/openai_clip/perf.yaml b/qai_hub_models/models/openai_clip/perf.yaml index 39517bc7..88e63aa3 100644 --- a/qai_hub_models/models/openai_clip/perf.yaml +++ b/qai_hub_models/models/openai_clip/perf.yaml @@ -42,55 +42,39 @@ aggregated: - SA8295P - SA8650P Proxy models: -- name: CLIPTextEncoder +- name: CLIPImageEncoder universal_assets: - torchscript_onnx_tflite: mq3e2dglm - torchscript_onnx: mqy3d48vm + torchscript_onnx_tflite: mqv64yk0m performance_metrics: - torchscript_onnx_tflite: - inference_time: 5678.0 - throughput: 176.11835153222967 + inference_time: 34591.0 + throughput: 28.909253852158077 estimated_peak_memory_range: - min: 16384 - max: 18323296 + min: 61440 + max: 59508792 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 658 + layers_on_npu: 659 layers_on_gpu: 0 - layers_on_cpu: 2 - total_layers: 660 - job_id: jp3jxvozg + layers_on_cpu: 0 + total_layers: 659 + job_id: jgo2l971p job_status: Passed torchscript_onnx_qnn: - inference_time: 4678.0 - throughput: 213.76656690893543 + inference_time: 26472.0 + throughput: 37.77576307041402 estimated_peak_memory_range: - min: 12288 - max: 18707280 + min: 40960 + max: 57776376 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 445 + layers_on_npu: 438 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 445 - job_id: jgkeo70og - job_status: Passed - torchscript_onnx: - inference_time: 34783.0 - throughput: 28.749676566138632 - estimated_peak_memory_range: - min: 81920 - max: 1493623392 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 507 - layers_on_gpu: 0 - layers_on_cpu: 1 - total_layers: 508 - job_id: jp0zd3xe5 + total_layers: 438 + job_id: j5q6ldnep job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,51 +83,36 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:47:29Z' + timestamp: '2024-12-11T23:12:26Z' - torchscript_onnx_tflite: - inference_time: 3997.0 - throughput: 250.1876407305479 + inference_time: 27035.0 + throughput: 36.989088218975404 estimated_peak_memory_range: - min: 16384 - max: 87862336 + min: 45056 + max: 277078624 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 658 + layers_on_npu: 659 layers_on_gpu: 0 - layers_on_cpu: 2 - total_layers: 660 - job_id: jpv6e02m5 + layers_on_cpu: 0 + total_layers: 659 + job_id: jgjvr861g job_status: Passed torchscript_onnx_qnn: - inference_time: 3288.0 - throughput: 304.1362530413625 + inference_time: 20808.0 + throughput: 48.05843906189927 estimated_peak_memory_range: - min: 12288 - max: 73120336 + min: 638976 + max: 178428272 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 445 + layers_on_npu: 438 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 445 - job_id: jglvo68l5 - job_status: Passed - torchscript_onnx: - inference_time: 24961.0 - throughput: 40.062497496093904 - estimated_peak_memory_range: - min: 118784 - max: 556851536 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 507 - layers_on_gpu: 0 - layers_on_cpu: 1 - total_layers: 508 - job_id: jgkeo7kog + total_layers: 438 + job_id: j56y80xnp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,51 +121,36 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:47:31Z' + timestamp: '2024-12-11T23:12:28Z' - torchscript_onnx_tflite: - inference_time: 3961.0 - throughput: 252.46149962130775 + inference_time: 24249.0 + throughput: 41.23881397171017 estimated_peak_memory_range: - min: 12288 - max: 86049952 + min: 32768 + max: 278407232 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 658 + layers_on_npu: 659 layers_on_gpu: 0 - layers_on_cpu: 2 - total_layers: 660 - job_id: jped8e605 + layers_on_cpu: 0 + total_layers: 659 + job_id: jgz3l0q45 job_status: Passed torchscript_onnx_qnn: - inference_time: 3270.0 - throughput: 305.8103975535168 + inference_time: 18669.0 + throughput: 53.564732979806095 estimated_peak_memory_range: - min: 8192 - max: 69872480 + min: 0 + max: 179703408 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 445 + layers_on_npu: 438 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 445 - job_id: jp3jxv7zg - job_status: Passed - torchscript_onnx: - inference_time: 20656.0 - throughput: 48.412083656080554 - estimated_peak_memory_range: - min: 24576 - max: 331043232 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 507 - layers_on_gpu: 0 - layers_on_cpu: 1 - total_layers: 508 - job_id: jglvo6ql5 + total_layers: 438 + job_id: jgo2l9x1p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,36 +159,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:47:33Z' + timestamp: '2024-12-11T23:12:30Z' - torchscript_onnx_tflite: - inference_time: 5589.0 - throughput: 178.9228842368939 + inference_time: 33984.0 + throughput: 29.425612052730695 estimated_peak_memory_range: - min: 20480 - max: 18487136 + min: 61440 + max: 57776584 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 658 + layers_on_npu: 659 layers_on_gpu: 0 - layers_on_cpu: 2 - total_layers: 660 - job_id: j5we82yj5 + layers_on_cpu: 0 + total_layers: 659 + job_id: jg9lzq7mg job_status: Passed torchscript_onnx_qnn: - inference_time: 4685.0 - throughput: 213.4471718249733 + inference_time: 19984.0 + throughput: 50.0400320256205 estimated_peak_memory_range: - min: 28672 - max: 1359632 + min: 675840 + max: 1899152 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 445 + layers_on_npu: 438 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 445 - job_id: jpv6e0mm5 + total_layers: 438 + job_id: jgjvr891g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,36 +197,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:47:12Z' + timestamp: '2024-12-11T23:12:32Z' - torchscript_onnx_tflite: - inference_time: 61394.0 - throughput: 16.28823663550184 + inference_time: 327040.0 + throughput: 3.0577299412915853 estimated_peak_memory_range: - min: 90112 - max: 84428304 + min: 139264 + max: 276875376 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 658 + layers_on_npu: 659 layers_on_gpu: 0 - layers_on_cpu: 2 - total_layers: 660 - job_id: jp147yolp + layers_on_cpu: 0 + total_layers: 659 + job_id: jgdxdmy6p job_status: Passed torchscript_onnx_qnn: - inference_time: 51693.0 - throughput: 19.344979010697774 + inference_time: 265550.0 + throughput: 3.7657691583505932 estimated_peak_memory_range: - min: 163840 - max: 5808816 + min: 659456 + max: 11702384 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 445 + layers_on_npu: 438 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 445 - job_id: j5we82zj5 + total_layers: 438 + job_id: jg9lzqrmg job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,36 +235,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:47:16Z' + timestamp: '2024-12-11T23:12:36Z' - torchscript_onnx_tflite: - inference_time: 5678.0 - throughput: 176.11835153222967 + inference_time: 34335.0 + throughput: 29.1247997670016 estimated_peak_memory_range: - min: 20480 - max: 18789432 + min: 28672 + max: 57031328 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 658 + layers_on_npu: 659 layers_on_gpu: 0 - layers_on_cpu: 2 - total_layers: 660 - job_id: j57yk0nr5 + layers_on_cpu: 0 + total_layers: 659 + job_id: jp4ly2625 job_status: Passed torchscript_onnx_qnn: - inference_time: 4766.0 - throughput: 209.819555182543 + inference_time: 20528.0 + throughput: 48.71395167575994 estimated_peak_memory_range: - min: 20480 - max: 1342312 + min: 667648 + max: 2019936 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 445 + layers_on_npu: 438 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 445 - job_id: jp147y1lp + total_layers: 438 + job_id: jgdxdmk6p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,36 +273,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:47:18Z' + timestamp: '2024-12-11T23:12:38Z' - torchscript_onnx_tflite: - inference_time: 7639.0 - throughput: 130.90718680455558 + inference_time: 40114.0 + throughput: 24.928952485416563 estimated_peak_memory_range: - min: 28672 - max: 70049920 + min: 36864 + max: 210130656 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 658 + layers_on_npu: 659 layers_on_gpu: 0 - layers_on_cpu: 2 - total_layers: 660 - job_id: jpxk3nr95 + layers_on_cpu: 0 + total_layers: 659 + job_id: j5mn0l17p job_status: Passed torchscript_onnx_qnn: - inference_time: 6535.0 - throughput: 153.0221882172915 + inference_time: 30939.0 + throughput: 32.32166521219173 estimated_peak_memory_range: - min: 61440 - max: 5952576 + min: 696320 + max: 6829424 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 445 + layers_on_npu: 438 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 445 - job_id: j57yk08r5 + total_layers: 438 + job_id: jpxklzq85 job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,36 +311,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:47:20Z' + timestamp: '2024-12-11T23:12:40Z' - torchscript_onnx_tflite: - inference_time: 5695.0 - throughput: 175.5926251097454 + inference_time: 34062.0 + throughput: 29.358229111619988 estimated_peak_memory_range: - min: 20480 - max: 18495512 + min: 65536 + max: 61283688 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 658 + layers_on_npu: 659 layers_on_gpu: 0 - layers_on_cpu: 2 - total_layers: 660 - job_id: jgn6olqm5 + layers_on_cpu: 0 + total_layers: 659 + job_id: jprvl7mkg job_status: Passed torchscript_onnx_qnn: - inference_time: 4767.0 - throughput: 209.77554017201595 + inference_time: 20836.0 + throughput: 47.99385678633135 estimated_peak_memory_range: - min: 49152 - max: 1276344 + min: 663552 + max: 2262800 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 445 + layers_on_npu: 438 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 445 - job_id: jpxk3nz95 + total_layers: 438 + job_id: jgn6zw4j5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,36 +349,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:47:22Z' + timestamp: '2024-12-11T23:12:42Z' - torchscript_onnx_tflite: - inference_time: 8155.0 - throughput: 122.6241569589209 + inference_time: 42508.0 + throughput: 23.524983532511527 estimated_peak_memory_range: - min: 16384 - max: 84043936 + min: 81920 + max: 277010608 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 658 + layers_on_npu: 659 layers_on_gpu: 0 - layers_on_cpu: 2 - total_layers: 660 - job_id: jp2k40dmp + layers_on_cpu: 0 + total_layers: 659 + job_id: jpy1oyk0p job_status: Passed torchscript_onnx_qnn: - inference_time: 6942.0 - throughput: 144.05070584845865 + inference_time: 29748.0 + throughput: 33.6157052574963 estimated_peak_memory_range: - min: 12288 - max: 5728720 + min: 659456 + max: 11454960 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 445 + layers_on_npu: 438 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 445 - job_id: jgn6olwm5 + total_layers: 438 + job_id: jp2krz16p job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,36 +387,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:47:25Z' + timestamp: '2024-12-11T23:12:44Z' - torchscript_onnx_tflite: - inference_time: 6336.0 - throughput: 157.82828282828282 + inference_time: 34902.0 + throughput: 28.651653200389664 estimated_peak_memory_range: - min: 16384 - max: 72386880 + min: 77824 + max: 210684928 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 658 + layers_on_npu: 659 layers_on_gpu: 0 - layers_on_cpu: 2 - total_layers: 660 - job_id: jp0zd39e5 + layers_on_cpu: 0 + total_layers: 659 + job_id: jp8qeknqp job_status: Passed torchscript_onnx_qnn: - inference_time: 5217.0 - throughput: 191.68104274487254 + inference_time: 28971.0 + throughput: 34.51727589658624 estimated_peak_memory_range: - min: 24576 - max: 72239840 + min: 0 + max: 176875472 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 445 + layers_on_npu: 438 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 445 - job_id: jp2k40zmp + total_layers: 438 + job_id: jp0zmxn05 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,36 +425,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:47:27Z' + timestamp: '2024-12-11T23:12:46Z' - torchscript_onnx_qnn: - inference_time: 5156.0 - throughput: 193.9487975174554 + inference_time: 22167.0 + throughput: 45.11210357738982 estimated_peak_memory_range: - min: 184320 - max: 184320 + min: 602112 + max: 602112 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 445 + layers_on_npu: 438 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 445 - job_id: jped8ex05 - job_status: Passed - torchscript_onnx: - inference_time: 38009.0 - throughput: 26.309558262516774 - estimated_peak_memory_range: - min: 132530176 - max: 132530176 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 507 - layers_on_gpu: 0 - layers_on_cpu: 1 - total_layers: 508 - job_id: jp3jxvrzg + total_layers: 438 + job_id: jgz3l0645 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,56 +448,40 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:47:35Z' -- name: CLIPImageEncoder + timestamp: '2024-12-11T23:12:34Z' +- name: CLIPTextEncoder universal_assets: - torchscript_onnx_tflite: mq3e2d83m - torchscript_onnx: mnwezprrn + torchscript_onnx_tflite: mqe7x1xkm performance_metrics: - torchscript_onnx_tflite: - inference_time: 34201.0 - throughput: 29.238911142949036 + inference_time: 5809.0 + throughput: 172.14666896195558 estimated_peak_memory_range: - min: 40960 - max: 53874968 + min: 16384 + max: 25497880 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 659 + layers_on_npu: 658 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 659 - job_id: jgo2okddp + layers_on_cpu: 2 + total_layers: 660 + job_id: jpv6lnyz5 job_status: Passed torchscript_onnx_qnn: - inference_time: 26406.0 - throughput: 37.87018101946527 + inference_time: 4636.0 + throughput: 215.70319240724763 estimated_peak_memory_range: - min: 61440 - max: 56426024 + min: 16384 + max: 19228216 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 438 + layers_on_npu: 445 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 438 - job_id: j5q6ze1mp - job_status: Passed - torchscript_onnx: - inference_time: 158287.0 - throughput: 6.31763821413003 - estimated_peak_memory_range: - min: 69632 - max: 203042520 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 501 - layers_on_gpu: 0 - layers_on_cpu: 1 - total_layers: 502 - job_id: jp8q60k8p + total_layers: 445 + job_id: jglvyqd25 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -567,36 +490,36 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:47:29Z' + timestamp: '2024-12-11T23:12:27Z' - torchscript_onnx_tflite: - inference_time: 26701.0 - throughput: 37.45178083217857 + inference_time: 3991.0 + throughput: 250.56376847907794 estimated_peak_memory_range: - min: 45056 - max: 274313376 + min: 16384 + max: 87081760 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 659 + layers_on_npu: 658 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 659 - job_id: jgjvoz38g + layers_on_cpu: 2 + total_layers: 660 + job_id: jped7n085 job_status: Passed torchscript_onnx_qnn: - inference_time: 20878.0 - throughput: 47.89730817128078 + inference_time: 3281.0 + throughput: 304.7851264858275 estimated_peak_memory_range: - min: 51761152 - max: 231103904 + min: 12288 + max: 71178624 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 438 + layers_on_npu: 445 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 438 - job_id: j56yrem7p + total_layers: 445 + job_id: jp3jzrdmg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -605,51 +528,36 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:47:31Z' + timestamp: '2024-12-11T23:12:29Z' - torchscript_onnx_tflite: - inference_time: 21369.0 - throughput: 46.79676166409285 + inference_time: 3351.0 + throughput: 298.41838257236645 estimated_peak_memory_range: - min: 28672 - max: 277948464 + min: 12288 + max: 87352336 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 659 + layers_on_npu: 658 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 659 - job_id: jgz38oz65 + layers_on_cpu: 2 + total_layers: 660 + job_id: j5welr045 job_status: Passed torchscript_onnx_qnn: - inference_time: 18876.0 - throughput: 52.977325704598435 + inference_time: 3197.0 + throughput: 312.7932436659368 estimated_peak_memory_range: - min: 0 - max: 180237872 + min: 8192 + max: 71637328 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 438 + layers_on_npu: 445 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 438 - job_id: jgo2okwdp - job_status: Passed - torchscript_onnx: - inference_time: 95268.0 - throughput: 10.496704034933032 - estimated_peak_memory_range: - min: 643072 - max: 2850188224 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 501 - layers_on_gpu: 0 - layers_on_cpu: 1 - total_layers: 502 - job_id: j56yre07p + total_layers: 445 + job_id: jpv6ln8z5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -658,36 +566,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:47:33Z' + timestamp: '2024-12-11T23:12:31Z' - torchscript_onnx_tflite: - inference_time: 33570.0 - throughput: 29.788501638367592 + inference_time: 5613.0 + throughput: 178.15784785319792 estimated_peak_memory_range: - min: 0 - max: 54049032 + min: 40960 + max: 24415064 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 659 + layers_on_npu: 658 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 659 - job_id: jg9lkjovg + layers_on_cpu: 2 + total_layers: 660 + job_id: jp14nmknp job_status: Passed torchscript_onnx_qnn: - inference_time: 20122.0 - throughput: 49.696849219759464 + inference_time: 4743.0 + throughput: 210.8370229812355 estimated_peak_memory_range: - min: 692224 - max: 1802312 + min: 32768 + max: 1271232 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 438 + layers_on_npu: 445 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 438 - job_id: jgjvozy8g + total_layers: 445 + job_id: jped7nq85 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -696,36 +604,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:47:13Z' + timestamp: '2024-12-11T23:12:32Z' - torchscript_onnx_tflite: - inference_time: 326507.0 - throughput: 3.0627214730465195 + inference_time: 61341.0 + throughput: 16.30231003733229 estimated_peak_memory_range: - min: 139264 - max: 276595680 + min: 81920 + max: 85829232 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 659 + layers_on_npu: 658 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 659 - job_id: jgdx8e6lp + layers_on_cpu: 2 + total_layers: 660 + job_id: j57ye81n5 job_status: Passed torchscript_onnx_qnn: - inference_time: 265126.0 - throughput: 3.7717915255388004 + inference_time: 51576.0 + throughput: 19.388863037071506 estimated_peak_memory_range: - min: 729088 - max: 7359296 + min: 73728 + max: 11442528 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 438 + layers_on_npu: 445 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 438 - job_id: jg9lkj2vg + total_layers: 445 + job_id: jp14nm9np job_status: Passed reference_device_info: name: SA7255P ADP @@ -734,36 +642,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:47:17Z' + timestamp: '2024-12-11T23:12:36Z' - torchscript_onnx_tflite: - inference_time: 34116.0 - throughput: 29.31175987806308 + inference_time: 5729.0 + throughput: 174.55053237912375 estimated_peak_memory_range: - min: 20480 - max: 58840128 + min: 16384 + max: 23635544 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 659 + layers_on_npu: 658 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 659 - job_id: jp4lmk4l5 + layers_on_cpu: 2 + total_layers: 660 + job_id: jpxklz885 job_status: Passed torchscript_onnx_qnn: - inference_time: 20571.0 - throughput: 48.61212386369161 + inference_time: 4772.0 + throughput: 209.55574182732607 estimated_peak_memory_range: - min: 675840 - max: 2205504 + min: 28672 + max: 1224712 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 438 + layers_on_npu: 445 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 438 - job_id: jgdx8e4lp + total_layers: 445 + job_id: j57ye8mn5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -772,36 +680,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:47:19Z' + timestamp: '2024-12-11T23:12:38Z' - torchscript_onnx_tflite: - inference_time: 42408.0 - throughput: 23.580456517638183 + inference_time: 7632.0 + throughput: 131.0272536687631 estimated_peak_memory_range: - min: 69632 - max: 376753424 + min: 16384 + max: 70967744 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 659 + layers_on_npu: 658 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 659 - job_id: jp14dro2p + layers_on_cpu: 2 + total_layers: 660 + job_id: jgn6zwdj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 30852.0 - throughput: 32.41280954233113 + inference_time: 6530.0 + throughput: 153.1393568147014 estimated_peak_memory_range: - min: 647168 - max: 6352656 + min: 49152 + max: 6483632 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 438 + layers_on_npu: 445 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 438 - job_id: jp4lmk2l5 + total_layers: 445 + job_id: j5mn0l77p job_status: Passed reference_device_info: name: SA8295P ADP @@ -810,36 +718,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:47:21Z' + timestamp: '2024-12-11T23:12:40Z' - torchscript_onnx_tflite: - inference_time: 34117.0 - throughput: 29.31090072397925 + inference_time: 5678.0 + throughput: 176.11835153222967 estimated_peak_memory_range: - min: 0 - max: 59574328 + min: 20480 + max: 19869832 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 659 + layers_on_npu: 658 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 659 - job_id: jprvo8deg + layers_on_cpu: 2 + total_layers: 660 + job_id: jp2krzq6p job_status: Passed torchscript_onnx_qnn: - inference_time: 20427.0 - throughput: 48.95481470602634 + inference_time: 4872.0 + throughput: 205.2545155993432 estimated_peak_memory_range: - min: 663552 - max: 1869584 + min: 16384 + max: 1232016 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 438 + layers_on_npu: 445 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 438 - job_id: j5mnoqlqp + total_layers: 445 + job_id: jprvl7rkg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -848,21 +756,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:47:23Z' - - torchscript_onnx_qnn: - inference_time: 29742.0 - throughput: 33.622486719117745 + timestamp: '2024-12-11T23:12:42Z' + - torchscript_onnx_tflite: + inference_time: 8137.0 + throughput: 122.89541600098316 estimated_peak_memory_range: - min: 0 - max: 5716016 + min: 16384 + max: 85180672 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 438 + layers_on_npu: 658 + layers_on_gpu: 0 + layers_on_cpu: 2 + total_layers: 660 + job_id: jp0zmxw05 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 6947.0 + throughput: 143.94702749388225 + estimated_peak_memory_range: + min: 81920 + max: 6276736 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 445 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 438 - job_id: jprvo87eg + total_layers: 445 + job_id: jpy1oyl0p job_status: Passed reference_device_info: name: SA8775P ADP @@ -871,36 +794,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:47:25Z' + timestamp: '2024-12-11T23:12:44Z' - torchscript_onnx_tflite: - inference_time: 34821.0 - throughput: 28.718302173975474 + inference_time: 6349.0 + throughput: 157.50511891636478 estimated_peak_memory_range: min: 16384 - max: 212375408 + max: 77214608 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 659 + layers_on_npu: 658 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 659 - job_id: jp8q60r8p + layers_on_cpu: 2 + total_layers: 660 + job_id: jgke2k1vg job_status: Passed torchscript_onnx_qnn: - inference_time: 29464.0 - throughput: 33.9397230518599 + inference_time: 5399.0 + throughput: 185.21948508983144 estimated_peak_memory_range: - min: 0 - max: 177694960 + min: 12288 + max: 74340544 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 438 + layers_on_npu: 445 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 438 - job_id: jpy1qry4p + total_layers: 445 + job_id: jp8qeklqp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -909,36 +832,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:47:27Z' + timestamp: '2024-12-11T23:12:46Z' - torchscript_onnx_qnn: - inference_time: 22200.0 - throughput: 45.04504504504504 + inference_time: 5080.0 + throughput: 196.8503937007874 estimated_peak_memory_range: - min: 602112 - max: 602112 + min: 184320 + max: 184320 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 438 + layers_on_npu: 445 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 438 - job_id: jgz38oy65 - job_status: Passed - torchscript_onnx: - inference_time: 160456.0 - throughput: 6.232238121354141 - estimated_peak_memory_range: - min: 196833280 - max: 196833280 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 501 - layers_on_gpu: 0 - layers_on_cpu: 1 - total_layers: 502 - job_id: jgo2ok9dp + total_layers: 445 + job_id: j5welrk45 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -947,4 +855,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:47:35Z' + timestamp: '2024-12-11T23:12:34Z' diff --git a/qai_hub_models/models/openpose/perf.yaml b/qai_hub_models/models/openpose/perf.yaml index 825268c4..f8b813ec 100644 --- a/qai_hub_models/models/openpose/perf.yaml +++ b/qai_hub_models/models/openpose/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: OpenPose universal_assets: - torchscript_onnx_tflite: mqv489g8q - torchscript_onnx: mqy3d475m + torchscript_onnx_tflite: mqp3z9rlm + torchscript_onnx: mmxe72zkn performance_metrics: - torchscript_onnx_tflite: - inference_time: 11686.0 - throughput: 85.57247989046722 + inference_time: 11792.0 + throughput: 84.8032564450475 estimated_peak_memory_range: - min: 20480 - max: 383005848 + min: 16384 + max: 382571920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jglvo6ye5 + job_id: jp0zmxx05 job_status: Passed torchscript_onnx_qnn: - inference_time: 11858.0 - throughput: 84.33125316242199 + inference_time: 11807.0 + throughput: 84.69551960701278 estimated_peak_memory_range: min: 618496 - max: 225278280 + max: 225583248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jp147ynkp + job_id: jped7nn85 job_status: Passed torchscript_onnx: - inference_time: 11987.0 - throughput: 83.42370901810294 + inference_time: 12038.0 + throughput: 83.07027745472669 estimated_peak_memory_range: - min: 53248 - max: 118933960 + min: 1097728 + max: 3817560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jprvo8xeg + job_id: jprvl77kg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:46:03Z' + timestamp: '2024-12-11T23:11:23Z' - torchscript_onnx_tflite: - inference_time: 8708.0 - throughput: 114.83693155718879 + inference_time: 8705.0 + throughput: 114.87650775416428 estimated_peak_memory_range: min: 16384 - max: 17939456 + max: 20241856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: j56yre8vp + job_id: jp8qekkqp job_status: Passed torchscript_onnx_qnn: - inference_time: 8762.0 - throughput: 114.1291942478886 + inference_time: 8758.0 + throughput: 114.18131993605846 estimated_peak_memory_range: min: 618496 - max: 19472672 + max: 20131504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jgdx8edkp + job_id: jgz3l0045 job_status: Passed torchscript_onnx: - inference_time: 8818.0 - throughput: 113.40440009072351 + inference_time: 8796.0 + throughput: 113.68804001819008 estimated_peak_memory_range: - min: 491520 - max: 50470128 + min: 1155072 + max: 53335872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jp2k40omp + job_id: jp2krzz6p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:46:04Z' + timestamp: '2024-12-11T23:11:25Z' - torchscript_onnx_tflite: inference_time: 8660.0 throughput: 115.47344110854503 estimated_peak_memory_range: min: 12288 - max: 15211088 + max: 18436176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jp3jxvzxg + job_id: jgke2kkvg job_status: Passed torchscript_onnx_qnn: - inference_time: 7143.0 - throughput: 139.9972000559989 + inference_time: 8719.0 + throughput: 114.69205184080744 estimated_peak_memory_range: min: 614400 - max: 15330320 + max: 18296752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: j5we82lj5 + job_id: j5welrr45 job_status: Passed torchscript_onnx: - inference_time: 8740.0 - throughput: 114.41647597254004 + inference_time: 8766.0 + throughput: 114.07711613050422 estimated_peak_memory_range: - min: 0 - max: 30498400 + min: 1146880 + max: 33200352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jpy1qr84p + job_id: jpy1oyy0p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:46:05Z' + timestamp: '2024-12-11T23:11:26Z' - torchscript_onnx_tflite: - inference_time: 11689.0 - throughput: 85.55051758063136 + inference_time: 11710.0 + throughput: 85.39709649871904 estimated_peak_memory_range: - min: 192512 - max: 382956368 + min: 200704 + max: 393912984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jgo2okl4p + job_id: j5q6lddep job_status: Passed torchscript_onnx_qnn: - inference_time: 11722.0 - throughput: 85.30967411704488 + inference_time: 11656.0 + throughput: 85.79272477693891 estimated_peak_memory_range: - min: 651264 - max: 1889856 + min: 679936 + max: 2019704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jg9lkjzvg + job_id: jg9lzqqmg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:45:54Z' + timestamp: '2024-12-11T23:11:14Z' - torchscript_onnx_tflite: - inference_time: 770133.0 - throughput: 1.298477016307573 + inference_time: 770143.0 + throughput: 1.29846015610088 estimated_peak_memory_range: - min: 475136 - max: 15206320 + min: 8192 + max: 16647536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jpv6e0l75 + job_id: jglvyqq25 job_status: Passed torchscript_onnx_qnn: - inference_time: 770221.0 - throughput: 1.298328661514033 + inference_time: 770181.0 + throughput: 1.2983960913084067 estimated_peak_memory_range: - min: 856064 - max: 7577536 + min: 827392 + max: 11094704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jgdx8edlp + job_id: jgdxdmm6p job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:45:56Z' + timestamp: '2024-12-11T23:11:17Z' - torchscript_onnx_tflite: - inference_time: 11776.0 - throughput: 84.91847826086956 + inference_time: 11749.0 + throughput: 85.11362669163333 estimated_peak_memory_range: min: 196608 - max: 383021536 + max: 393372480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jgjvozr7g + job_id: j56y800np job_status: Passed torchscript_onnx_qnn: - inference_time: 11663.0 - throughput: 85.74123295892996 + inference_time: 11792.0 + throughput: 84.8032564450475 estimated_peak_memory_range: - min: 659456 - max: 1991912 + min: 667648 + max: 2051288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: j57yk0or5 + job_id: j57ye88n5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:45:57Z' + timestamp: '2024-12-11T23:11:18Z' - torchscript_onnx_tflite: - inference_time: 26620.0 - throughput: 37.56574004507889 + inference_time: 26610.0 + throughput: 37.579857196542655 estimated_peak_memory_range: min: 32768 - max: 14515488 + max: 16633248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jped8e775 + job_id: jp3jzrrmg job_status: Passed torchscript_onnx_qnn: - inference_time: 25861.0 - throughput: 38.66826495495147 + inference_time: 26496.0 + throughput: 37.74154589371981 estimated_peak_memory_range: - min: 704512 - max: 6604480 + min: 3055616 + max: 9219008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jp4lmkel5 + job_id: jp4ly2225 job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:45:59Z' + timestamp: '2024-12-11T23:11:19Z' - torchscript_onnx_tflite: - inference_time: 11734.0 - throughput: 85.22243054371911 + inference_time: 11798.0 + throughput: 84.76012883539583 estimated_peak_memory_range: - min: 200704 - max: 393351264 + min: 221184 + max: 383015328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jgz38olz5 + job_id: jgo2l991p job_status: Passed torchscript_onnx_qnn: - inference_time: 11650.0 - throughput: 85.83690987124463 + inference_time: 11649.0 + throughput: 85.84427847883939 estimated_peak_memory_range: - min: 692224 - max: 2375160 + min: 671744 + max: 2325552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jpxk3n095 + job_id: jpxklzz85 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:46:00Z' + timestamp: '2024-12-11T23:11:20Z' - torchscript_onnx_tflite: - inference_time: 29305.0 - throughput: 34.12386964681795 + inference_time: 29306.0 + throughput: 34.122705248072066 estimated_peak_memory_range: min: 16384 - max: 14497968 + max: 16937936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: j5we82lz5 + job_id: jpv6lnnz5 job_status: Passed torchscript_onnx_qnn: - inference_time: 29336.0 - throughput: 34.08781019907281 + inference_time: 29313.0 + throughput: 34.11455668133593 estimated_peak_memory_range: - min: 663552 - max: 6454816 + min: 708608 + max: 6722816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: j5mnoq9qp + job_id: j5mn0ll7p job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:46:01Z' + timestamp: '2024-12-11T23:11:21Z' - torchscript_onnx_tflite: - inference_time: 23494.0 - throughput: 42.56405890865753 + inference_time: 23508.0 + throughput: 42.53871022630594 estimated_peak_memory_range: - min: 110592 - max: 19300240 + min: 69632 + max: 22401744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jg9lkjzqg + job_id: jgjvr881g job_status: Passed torchscript_onnx_qnn: - inference_time: 23477.0 - throughput: 42.594880095412535 + inference_time: 23670.0 + throughput: 42.24757076468103 estimated_peak_memory_range: - min: 638976 - max: 21712064 + min: 647168 + max: 23617760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jgn6ol1m5 + job_id: jgn6zwwj5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:46:02Z' + timestamp: '2024-12-11T23:11:22Z' - torchscript_onnx_qnn: - inference_time: 12265.0 - throughput: 81.53281695882593 + inference_time: 12287.0 + throughput: 81.3868316106454 estimated_peak_memory_range: min: 602112 max: 602112 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jp147ynlp + job_id: jp14nmmnp job_status: Passed torchscript_onnx: - inference_time: 12768.0 - throughput: 78.32080200501254 + inference_time: 12736.0 + throughput: 78.51758793969849 estimated_peak_memory_range: - min: 106549248 - max: 106549248 + min: 107786240 + max: 107786240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jp0zd3oe5 + job_id: jp8qekdqp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:46:06Z' + timestamp: '2024-12-11T23:11:27Z' diff --git a/qai_hub_models/models/posenet_mobilenet/perf.yaml b/qai_hub_models/models/posenet_mobilenet/perf.yaml index f6fd01ec..6baa5ea1 100644 --- a/qai_hub_models/models/posenet_mobilenet/perf.yaml +++ b/qai_hub_models/models/posenet_mobilenet/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: Posenet-Mobilenet universal_assets: - torchscript_onnx_tflite: mnlvk1dem - torchscript_onnx: mqy3d465m + torchscript_onnx_tflite: mqyv3rw7q + torchscript_onnx: mq8dk51gm performance_metrics: - torchscript_onnx_tflite: - inference_time: 1390.0 - throughput: 719.4244604316547 + inference_time: 1391.0 + throughput: 718.9072609633357 estimated_peak_memory_range: min: 16384 - max: 157596632 + max: 186036976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jprvo80vg + job_id: jprvl7x7g job_status: Passed torchscript_onnx_qnn: - inference_time: 1461.0 - throughput: 684.4626967830253 + inference_time: 1468.0 + throughput: 681.1989100817439 estimated_peak_memory_range: - min: 12288 - max: 12777496 + min: 36864 + max: 68144160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jpv6e0d75 + job_id: jgo2l9wkp job_status: Passed torchscript_onnx: - inference_time: 1899.0 - throughput: 526.592943654555 + inference_time: 1901.0 + throughput: 526.0389268805892 estimated_peak_memory_range: - min: 16384 - max: 8076424 + min: 20480 + max: 131999136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: j5mnoqmyp + job_id: jp14nm1np job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:45:13Z' + timestamp: '2024-12-11T23:10:34Z' - torchscript_onnx_tflite: - inference_time: 962.0 - throughput: 1039.5010395010395 + inference_time: 961.0 + throughput: 1040.5827263267429 estimated_peak_memory_range: min: 12288 - max: 17140208 + max: 22010320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jp2k40wxp + job_id: jp2krzoqp job_status: Passed torchscript_onnx_qnn: - inference_time: 1017.0 - throughput: 983.284169124877 + inference_time: 1019.0 + throughput: 981.3542688910696 estimated_peak_memory_range: - min: 0 - max: 15354160 + min: 1597440 + max: 21532112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jgjvoz77g + job_id: jpv6lnmr5 job_status: Passed torchscript_onnx: - inference_time: 1326.0 - throughput: 754.1478129713424 + inference_time: 1332.0 + throughput: 750.7507507507507 estimated_peak_memory_range: min: 0 - max: 47670272 + max: 50233680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jgn6olzv5 + job_id: jgdxdm46p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:45:14Z' + timestamp: '2024-12-11T23:10:35Z' - torchscript_onnx_tflite: - inference_time: 1007.0 - throughput: 993.0486593843099 + inference_time: 1008.0 + throughput: 992.063492063492 estimated_peak_memory_range: min: 8192 - max: 13007072 + max: 13508992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jpy1qrxrp + job_id: jpy1oy8lp job_status: Passed torchscript_onnx_qnn: - inference_time: 835.0 - throughput: 1197.6047904191616 + inference_time: 1012.0 + throughput: 988.1422924901186 estimated_peak_memory_range: min: 0 - max: 14303488 + max: 17185312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jped8ez75 + job_id: jgjvr8yeg job_status: Passed torchscript_onnx: - inference_time: 1300.0 - throughput: 769.2307692307693 + inference_time: 1067.0 + throughput: 937.207122774133 estimated_peak_memory_range: - min: 0 - max: 25428720 + min: 237568 + max: 25407504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jprvo8lvg + job_id: j57ye8nn5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:45:15Z' + timestamp: '2024-12-11T23:10:36Z' - torchscript_onnx_tflite: - inference_time: 1380.0 - throughput: 724.6376811594203 + inference_time: 1382.0 + throughput: 723.589001447178 estimated_peak_memory_range: - min: 12288 - max: 22442824 + min: 16384 + max: 23322304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jp0zd3j25 + job_id: jp0zmxon5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1402.0 - throughput: 713.2667617689016 + inference_time: 1399.0 + throughput: 714.7962830593281 estimated_peak_memory_range: - min: 1654784 - max: 2839104 + min: 1634304 + max: 2883240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jgz38omz5 + job_id: jped7nxv5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:45:04Z' + timestamp: '2024-12-11T23:10:25Z' - torchscript_onnx_tflite: - inference_time: 53854.0 - throughput: 18.568722843242842 + inference_time: 53844.0 + throughput: 18.572171458286903 estimated_peak_memory_range: - min: 397312 - max: 13979600 + min: 364544 + max: 16061904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jp8q60xzp + job_id: jp8qekjop job_status: Passed torchscript_onnx_qnn: - inference_time: 54108.0 - throughput: 18.481555407703112 + inference_time: 54075.0 + throughput: 18.49283402681461 estimated_peak_memory_range: - min: 1736704 - max: 7534976 + min: 1683456 + max: 11952320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jg9lkjmqg + job_id: j5welrzm5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:45:06Z' + timestamp: '2024-12-11T23:10:27Z' - torchscript_onnx_tflite: - inference_time: 1389.0 - throughput: 719.9424046076314 + inference_time: 1384.0 + throughput: 722.543352601156 estimated_peak_memory_range: - min: 28672 - max: 22795312 + min: 20480 + max: 23326528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jgkeo74yg + job_id: jgke2k6ng job_status: Passed torchscript_onnx_qnn: - inference_time: 1412.0 - throughput: 708.2152974504249 + inference_time: 1411.0 + throughput: 708.7172218284904 estimated_peak_memory_range: - min: 36864 - max: 1336704 + min: 28672 + max: 1300856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jp147yjkp + job_id: jg9lzq28g job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:45:07Z' + timestamp: '2024-12-11T23:10:28Z' - torchscript_onnx_tflite: inference_time: 2796.0 throughput: 357.653791130186 estimated_peak_memory_range: min: 16384 - max: 12920816 + max: 13710464 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: j5q6zey7p + job_id: j5q6ld4op job_status: Passed torchscript_onnx_qnn: - inference_time: 3159.0 - throughput: 316.5558721114277 + inference_time: 2935.0 + throughput: 340.71550255536624 estimated_peak_memory_range: min: 0 - max: 5571056 + max: 5931680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jgdx8e3kp + job_id: jp14nm17p job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:45:08Z' + timestamp: '2024-12-11T23:10:30Z' - torchscript_onnx_tflite: inference_time: 1383.0 throughput: 723.0657989877079 estimated_peak_memory_range: - min: 20480 - max: 22582840 + min: 16384 + max: 207478960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jglvo6xe5 + job_id: jglvyq8m5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1411.0 - throughput: 708.7172218284904 + inference_time: 1406.0 + throughput: 711.2375533428165 estimated_peak_memory_range: - min: 1662976 - max: 3007040 + min: 28672 + max: 1304808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: j57yk04q5 + job_id: jgdxdm4zp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:45:09Z' + timestamp: '2024-12-11T23:10:31Z' - torchscript_onnx_tflite: - inference_time: 2874.0 - throughput: 347.9471120389701 + inference_time: 2843.0 + throughput: 351.74111853675697 estimated_peak_memory_range: - min: 12288 - max: 14917328 + min: 16384 + max: 18605136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: j56yre7vp + job_id: j56y80myp job_status: Passed torchscript_onnx_qnn: - inference_time: 3090.0 - throughput: 323.62459546925567 + inference_time: 3049.0 + throughput: 327.97638570022957 estimated_peak_memory_range: - min: 1597440 - max: 7374048 + min: 1601536 + max: 7717760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jp4lmk1q5 + job_id: j5welrz45 job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:45:10Z' + timestamp: '2024-12-11T23:10:32Z' - torchscript_onnx_tflite: - inference_time: 2197.0 - throughput: 455.1661356395084 + inference_time: 2199.0 + throughput: 454.7521600727603 estimated_peak_memory_range: min: 16384 - max: 18622032 + max: 23357984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jp3jxv9xg + job_id: jp3jzr7ng job_status: Passed torchscript_onnx_qnn: - inference_time: 2272.0 - throughput: 440.14084507042253 + inference_time: 2282.0 + throughput: 438.21209465381247 estimated_peak_memory_range: min: 1597440 - max: 21024800 + max: 26016704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jpxk3n4j5 + job_id: jg9lzq2mg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,7 +471,7 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:45:12Z' + timestamp: '2024-12-11T23:10:33Z' - torchscript_onnx_qnn: inference_time: 1583.0 throughput: 631.7119393556538 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: j5we827z5 + job_id: jgz3l0yx5 job_status: Passed torchscript_onnx: - inference_time: 2260.0 - throughput: 442.4778761061947 + inference_time: 2236.0 + throughput: 447.2271914132379 estimated_peak_memory_range: - min: 7118848 - max: 7118848 + min: 7069696 + max: 7069696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jp2k40rxp + job_id: jp4ly2425 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:45:16Z' + timestamp: '2024-12-11T23:10:37Z' diff --git a/qai_hub_models/models/posenet_mobilenet_quantized/perf.yaml b/qai_hub_models/models/posenet_mobilenet_quantized/perf.yaml index 5d5f4b31..15ec08e2 100644 --- a/qai_hub_models/models/posenet_mobilenet_quantized/perf.yaml +++ b/qai_hub_models/models/posenet_mobilenet_quantized/perf.yaml @@ -47,14 +47,14 @@ aggregated: models: - name: Posenet-Mobilenet-Quantized universal_assets: - torchscript_onnx_tflite: mqy3d415m + torchscript_onnx_tflite: mm6kvpo6n performance_metrics: - torchscript_onnx_tflite: - inference_time: 566.0 - throughput: 1766.7844522968198 + inference_time: 564.0 + throughput: 1773.049645390071 estimated_peak_memory_range: - min: 12288 - max: 8488096 + min: 16384 + max: 13503672 primary_compute_unit: NPU precision: int8 layer_info: @@ -62,14 +62,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 48 - job_id: jp147yvkp + job_id: jp14nmn7p job_status: Passed torchscript_onnx_qnn: - inference_time: 628.0 - throughput: 1592.3566878980891 + inference_time: 639.0 + throughput: 1564.9452269170579 estimated_peak_memory_range: - min: 413696 - max: 111524024 + min: 28672 + max: 8309936 primary_compute_unit: NPU precision: int8 layer_info: @@ -77,7 +77,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jgkeo78yg + job_id: jgke2k2ng job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -86,13 +86,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:44:17Z' + timestamp: '2024-12-11T23:09:39Z' - torchscript_onnx_tflite: - inference_time: 393.0 - throughput: 2544.529262086514 + inference_time: 391.0 + throughput: 2557.544757033248 estimated_peak_memory_range: min: 12288 - max: 23765712 + max: 27518864 primary_compute_unit: NPU precision: int8 layer_info: @@ -100,14 +100,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 48 - job_id: jgdx8ezkp + job_id: jgdxdmdzp job_status: Passed torchscript_onnx_qnn: - inference_time: 442.0 - throughput: 2262.443438914027 + inference_time: 445.0 + throughput: 2247.191011235955 estimated_peak_memory_range: - min: 0 - max: 19855776 + min: 409600 + max: 20243616 primary_compute_unit: NPU precision: int8 layer_info: @@ -115,7 +115,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: j5q6zev7p + job_id: j5q6ldlop job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -124,13 +124,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:44:18Z' + timestamp: '2024-12-11T23:09:40Z' - torchscript_onnx_tflite: - inference_time: 410.0 - throughput: 2439.0243902439024 + inference_time: 414.0 + throughput: 2415.458937198068 estimated_peak_memory_range: min: 8192 - max: 17579568 + max: 21595680 primary_compute_unit: NPU precision: int8 layer_info: @@ -138,14 +138,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 48 - job_id: j57yk07q5 + job_id: j57ye8e95 job_status: Passed torchscript_onnx_qnn: - inference_time: 472.0 - throughput: 2118.64406779661 + inference_time: 380.0 + throughput: 2631.5789473684213 estimated_peak_memory_range: - min: 0 - max: 18157488 + min: 405504 + max: 22837808 primary_compute_unit: NPU precision: int8 layer_info: @@ -153,7 +153,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jglvo6le5 + job_id: jglvyqwm5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -162,13 +162,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:44:19Z' + timestamp: '2024-12-11T23:09:41Z' - torchscript_onnx_tflite: - inference_time: 2288.0 - throughput: 437.06293706293707 + inference_time: 2226.0 + throughput: 449.23629829290206 estimated_peak_memory_range: min: 12288 - max: 22178528 + max: 25406048 primary_compute_unit: NPU precision: int8 layer_info: @@ -176,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 48 - job_id: jp4lmk9q5 + job_id: jp4ly2y15 job_status: Passed torchscript_onnx_qnn: - inference_time: 3064.0 - throughput: 326.37075718015666 + inference_time: 2921.0 + throughput: 342.3485107839781 estimated_peak_memory_range: min: 413696 - max: 8621824 + max: 7783360 primary_compute_unit: NPU precision: int8 layer_info: @@ -191,7 +191,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: j56yrewvp + job_id: j56y80oyp job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -200,13 +200,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T01:44:20Z' + timestamp: '2024-12-11T23:09:42Z' - torchscript_onnx_tflite: - inference_time: 12463.0 - throughput: 80.23750300890636 + inference_time: 13361.0 + throughput: 74.84469725319961 estimated_peak_memory_range: - min: 466944 - max: 7551720 + min: 12288 + max: 7722696 primary_compute_unit: NPU precision: int8 layer_info: @@ -214,7 +214,7 @@ models: layers_on_gpu: 3 layers_on_cpu: 0 total_layers: 48 - job_id: jpxk3ndj5 + job_id: jpxklzll5 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -223,13 +223,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T01:44:08Z' + timestamp: '2024-12-11T23:09:31Z' - torchscript_onnx_tflite: - inference_time: 552.0 - throughput: 1811.5942028985507 + inference_time: 566.0 + throughput: 1766.7844522968198 estimated_peak_memory_range: min: 12288 - max: 11455824 + max: 8461776 primary_compute_unit: NPU precision: int8 layer_info: @@ -237,14 +237,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 48 - job_id: j5mnoqdyp + job_id: j5mn0l09p job_status: Passed torchscript_onnx_qnn: - inference_time: 556.0 - throughput: 1798.5611510791366 + inference_time: 564.0 + throughput: 1773.049645390071 estimated_peak_memory_range: min: 430080 - max: 1513888 + max: 1639016 primary_compute_unit: NPU precision: int8 layer_info: @@ -252,7 +252,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jp3jxv6xg + job_id: jp3jzrong job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -261,13 +261,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:44:21Z' + timestamp: '2024-12-11T23:09:44Z' - torchscript_onnx_tflite: - inference_time: 7968.0 - throughput: 125.50200803212851 + inference_time: 7963.0 + throughput: 125.58081125204069 estimated_peak_memory_range: min: 90112 - max: 17865808 + max: 20483904 primary_compute_unit: NPU precision: int8 layer_info: @@ -275,14 +275,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 48 - job_id: jgn6olkv5 + job_id: jgn6zwzq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 8167.0 - throughput: 122.44398187829069 + inference_time: 8234.0 + throughput: 121.44765606023803 estimated_peak_memory_range: - min: 344064 - max: 5676576 + min: 339968 + max: 10724864 primary_compute_unit: NPU precision: int8 layer_info: @@ -290,7 +290,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jpv6e0775 + job_id: jpv6ln2r5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -299,13 +299,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:44:24Z' + timestamp: '2024-12-11T23:09:46Z' - torchscript_onnx_tflite: inference_time: 566.0 throughput: 1766.7844522968198 estimated_peak_memory_range: min: 12288 - max: 117117136 + max: 10589264 primary_compute_unit: NPU precision: int8 layer_info: @@ -313,14 +313,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 48 - job_id: jprvo8wvg + job_id: jprvl7l7g job_status: Passed torchscript_onnx_qnn: - inference_time: 564.0 - throughput: 1773.049645390071 + inference_time: 562.0 + throughput: 1779.3594306049822 estimated_peak_memory_range: min: 425984 - max: 1672528 + max: 1648152 primary_compute_unit: NPU precision: int8 layer_info: @@ -328,7 +328,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jgjvozq7g + job_id: jgjvr83eg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -337,13 +337,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:44:25Z' + timestamp: '2024-12-11T23:09:47Z' - torchscript_onnx_tflite: - inference_time: 1206.0 - throughput: 829.1873963515754 + inference_time: 1202.0 + throughput: 831.9467554076539 estimated_peak_memory_range: min: 12288 - max: 16728080 + max: 16287760 primary_compute_unit: NPU precision: int8 layer_info: @@ -351,14 +351,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 48 - job_id: jp2k40exp + job_id: jp2krzrqp job_status: Passed torchscript_onnx_qnn: - inference_time: 1461.0 - throughput: 684.4626967830253 + inference_time: 1452.0 + throughput: 688.7052341597796 estimated_peak_memory_range: - min: 0 - max: 5901152 + min: 409600 + max: 6367536 primary_compute_unit: NPU precision: int8 layer_info: @@ -366,7 +366,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jped8ey75 + job_id: jped7n6v5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -375,13 +375,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:44:26Z' + timestamp: '2024-12-11T23:09:48Z' - torchscript_onnx_tflite: - inference_time: 569.0 - throughput: 1757.469244288225 + inference_time: 564.0 + throughput: 1773.049645390071 estimated_peak_memory_range: min: 12288 - max: 8191352 + max: 118269032 primary_compute_unit: NPU precision: int8 layer_info: @@ -389,14 +389,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 48 - job_id: jpy1qrmrp + job_id: jpy1oyolp job_status: Passed torchscript_onnx_qnn: inference_time: 565.0 throughput: 1769.9115044247787 estimated_peak_memory_range: - min: 430080 - max: 2178336 + min: 421888 + max: 1692512 primary_compute_unit: NPU precision: int8 layer_info: @@ -404,7 +404,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jgz38onz5 + job_id: jgz3l0zx5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -413,13 +413,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:44:27Z' + timestamp: '2024-12-11T23:09:49Z' - torchscript_onnx_tflite: - inference_time: 993.0 - throughput: 1007.0493454179255 + inference_time: 1000.0 + throughput: 1000.0 estimated_peak_memory_range: min: 12288 - max: 16380432 + max: 21733280 primary_compute_unit: NPU precision: int8 layer_info: @@ -427,14 +427,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 48 - job_id: jp0zd3625 + job_id: jp0zmxmn5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1173.0 - throughput: 852.5149190110827 + inference_time: 1194.0 + throughput: 837.5209380234506 estimated_peak_memory_range: min: 409600 - max: 6190576 + max: 6540000 primary_compute_unit: NPU precision: int8 layer_info: @@ -442,7 +442,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: j5we824z5 + job_id: j5welrym5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -451,13 +451,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:44:29Z' + timestamp: '2024-12-11T23:09:50Z' - torchscript_onnx_tflite: - inference_time: 737.0 - throughput: 1356.85210312076 + inference_time: 744.0 + throughput: 1344.0860215053763 estimated_peak_memory_range: min: 12288 - max: 21621824 + max: 27249056 primary_compute_unit: NPU precision: int8 layer_info: @@ -465,14 +465,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 48 - job_id: jp8q601zp + job_id: jp8qekeop job_status: Passed torchscript_onnx_qnn: - inference_time: 798.0 - throughput: 1253.1328320802006 + inference_time: 804.0 + throughput: 1243.7810945273632 estimated_peak_memory_range: min: 409600 - max: 23007312 + max: 23179792 primary_compute_unit: NPU precision: int8 layer_info: @@ -480,7 +480,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jg9lkjdqg + job_id: jg9lzqo8g job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -489,10 +489,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:44:30Z' + timestamp: '2024-12-11T23:09:52Z' - torchscript_onnx_qnn: - inference_time: 685.0 - throughput: 1459.85401459854 + inference_time: 681.0 + throughput: 1468.4287812041116 estimated_peak_memory_range: min: 397312 max: 397312 @@ -503,7 +503,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jgo2ok84p + job_id: jgo2l9dkp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -512,4 +512,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:44:23Z' + timestamp: '2024-12-11T23:09:45Z' diff --git a/qai_hub_models/models/quicksrnetlarge/perf.yaml b/qai_hub_models/models/quicksrnetlarge/perf.yaml index 07a3a031..12e828cb 100644 --- a/qai_hub_models/models/quicksrnetlarge/perf.yaml +++ b/qai_hub_models/models/quicksrnetlarge/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: QuickSRNetLarge universal_assets: - torchscript_onnx_tflite: mq8k1vdvq - torchscript_onnx: mq247p1dm + torchscript_onnx_tflite: mnl6vyekn + torchscript_onnx: mq36er8lq performance_metrics: - torchscript_onnx_tflite: - inference_time: 2430.0 - throughput: 411.52263374485597 + inference_time: 2416.0 + throughput: 413.9072847682119 estimated_peak_memory_range: - min: 24576 - max: 4853856 + min: 28672 + max: 5070360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: j5we82845 + job_id: jpv6lndr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2115.0 - throughput: 472.8132387706856 + inference_time: 2114.0 + throughput: 473.0368968779565 estimated_peak_memory_range: - min: 217088 - max: 76502976 + min: 24576 + max: 6090640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jpxk3n3j5 + job_id: jpxklz4l5 job_status: Passed torchscript_onnx: - inference_time: 2745.0 - throughput: 364.29872495446267 + inference_time: 2769.0 + throughput: 361.14120621162874 estimated_peak_memory_range: - min: 212992 - max: 1650584 + min: 24576 + max: 2180616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 33 - job_id: j56yredvp + job_id: j56y808yp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:43:38Z' + timestamp: '2024-12-11T23:09:02Z' - torchscript_onnx_tflite: - inference_time: 1733.0 - throughput: 577.0340450086555 + inference_time: 1820.0 + throughput: 549.4505494505495 estimated_peak_memory_range: min: 16384 - max: 12999392 + max: 19790352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: jg9lkjkmg + job_id: jgjvr87eg job_status: Passed torchscript_onnx_qnn: - inference_time: 1498.0 - throughput: 667.5567423230974 + inference_time: 1497.0 + throughput: 668.002672010688 estimated_peak_memory_range: min: 208896 - max: 11041312 + max: 13148416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j5mnoqoyp + job_id: j5mn0lm9p job_status: Passed torchscript_onnx: - inference_time: 2181.0 - throughput: 458.50527281063734 + inference_time: 2191.0 + throughput: 456.41259698767686 estimated_peak_memory_range: min: 0 - max: 35656352 + max: 36523232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 33 - job_id: jp3jxvwxg + job_id: jp3jzrzng job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:43:39Z' + timestamp: '2024-12-11T23:09:03Z' - torchscript_onnx_tflite: - inference_time: 1986.0 - throughput: 503.5246727089627 + inference_time: 1791.0 + throughput: 558.3472920156337 estimated_peak_memory_range: min: 12288 - max: 11074816 + max: 11266224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: jp147y7np + job_id: jped7nzv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1329.0 - throughput: 752.4454477050414 + inference_time: 1562.0 + throughput: 640.2048655569782 estimated_peak_memory_range: min: 0 - max: 10721840 + max: 9851408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jgn6ol7v5 + job_id: jgn6zwnq5 job_status: Passed torchscript_onnx: - inference_time: 2285.0 - throughput: 437.636761487965 + inference_time: 2311.0 + throughput: 432.7131112072696 estimated_peak_memory_range: min: 0 - max: 15852864 + max: 16574512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 33 - job_id: jgo2ok44p + job_id: jgo2l9lkp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:43:40Z' + timestamp: '2024-12-11T23:09:04Z' - torchscript_onnx_tflite: - inference_time: 2448.0 - throughput: 408.4967320261438 + inference_time: 2516.0 + throughput: 397.456279809221 estimated_peak_memory_range: - min: 28672 - max: 4025344 + min: 16384 + max: 12021848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: jgdx8e86p + job_id: jgz3l0mx5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2199.0 - throughput: 454.7521600727603 + inference_time: 2208.0 + throughput: 452.8985507246377 estimated_peak_memory_range: - min: 229376 - max: 1695232 + min: 249856 + max: 1583880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jprvo8nvg + job_id: jprvl707g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:43:29Z' + timestamp: '2024-12-11T23:08:52Z' - torchscript_onnx_tflite: - inference_time: 90980.0 - throughput: 10.991426687183996 + inference_time: 89919.0 + throughput: 11.121120119218407 estimated_peak_memory_range: - min: 6356992 - max: 17914304 + min: 6361088 + max: 19317680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: j5we828z5 + job_id: j5welr7m5 job_status: Passed torchscript_onnx_qnn: - inference_time: 87671.0 - throughput: 11.40628029793204 + inference_time: 87661.0 + throughput: 11.40758147865071 estimated_peak_memory_range: - min: 413696 - max: 6188688 + min: 274432 + max: 10776800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jpy1qr7rp + job_id: jpy1oyxlp job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:43:32Z' + timestamp: '2024-12-11T23:08:54Z' - torchscript_onnx_tflite: - inference_time: 2460.0 - throughput: 406.5040650406504 + inference_time: 2464.0 + throughput: 405.84415584415586 estimated_peak_memory_range: - min: 3502080 - max: 8494064 + min: 16384 + max: 6173808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: jg9lkjkqg + job_id: jg9lzqm8g job_status: Passed torchscript_onnx_qnn: - inference_time: 2190.0 - throughput: 456.62100456621005 + inference_time: 2221.0 + throughput: 450.24763619990995 estimated_peak_memory_range: - min: 233472 - max: 1620160 + min: 217088 + max: 1490496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jp0zd3v25 + job_id: jp0zmxjn5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:43:33Z' + timestamp: '2024-12-11T23:08:56Z' - torchscript_onnx_tflite: - inference_time: 4719.0 - throughput: 211.90930281839374 + inference_time: 6465.0 + throughput: 154.67904098994586 estimated_peak_memory_range: min: 6307840 - max: 15258800 + max: 15411984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: jp147y7kp + job_id: jp14nmj7p job_status: Passed torchscript_onnx_qnn: - inference_time: 4824.0 - throughput: 207.29684908789386 + inference_time: 4059.0 + throughput: 246.3661000246366 estimated_peak_memory_range: - min: 221184 - max: 6203984 + min: 0 + max: 6071712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jp8q604zp + job_id: jp8qekxop job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:43:34Z' + timestamp: '2024-12-11T23:08:57Z' - torchscript_onnx_tflite: - inference_time: 2445.0 - throughput: 408.9979550102249 + inference_time: 2416.0 + throughput: 413.9072847682119 estimated_peak_memory_range: min: 16384 - max: 64857640 + max: 5399992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: jgdx8e8kp + job_id: jgdxdm3zp job_status: Passed torchscript_onnx_qnn: - inference_time: 2224.0 - throughput: 449.64028776978415 + inference_time: 2260.0 + throughput: 442.4778761061947 estimated_peak_memory_range: - min: 24576 - max: 4270440 + min: 225280 + max: 1412040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jgkeo79yg + job_id: jgke2k4ng job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:43:35Z' + timestamp: '2024-12-11T23:08:58Z' - torchscript_onnx_tflite: - inference_time: 5227.0 - throughput: 191.3143294432753 + inference_time: 5283.0 + throughput: 189.28639030853682 estimated_peak_memory_range: - min: 6328320 - max: 15861424 + min: 6311936 + max: 20295312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: j57yk0kq5 + job_id: j57ye8495 job_status: Passed torchscript_onnx_qnn: - inference_time: 5025.0 - throughput: 199.00497512437812 + inference_time: 4989.0 + throughput: 200.44097013429544 estimated_peak_memory_range: - min: 0 - max: 5907472 + min: 212992 + max: 6416320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j5q6zem7p + job_id: j5q6ldyop job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:43:36Z' + timestamp: '2024-12-11T23:08:59Z' - torchscript_onnx_tflite: - inference_time: 4453.0 - throughput: 224.56770716370985 + inference_time: 4826.0 + throughput: 207.21094073767094 estimated_peak_memory_range: - min: 6316032 - max: 20532672 + min: 6311936 + max: 22104656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: jp4lmkmq5 + job_id: jp4ly2115 job_status: Passed torchscript_onnx_qnn: - inference_time: 3448.0 - throughput: 290.0232018561485 + inference_time: 3529.0 + throughput: 283.36639274582035 estimated_peak_memory_range: - min: 212992 - max: 15848176 + min: 208896 + max: 16052800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jglvo61e5 + job_id: jglvyqym5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,13 +471,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:43:37Z' + timestamp: '2024-12-11T23:09:01Z' - torchscript_onnx_qnn: - inference_time: 2426.0 - throughput: 412.20115416323165 + inference_time: 2411.0 + throughput: 414.765657403567 estimated_peak_memory_range: - min: 221184 - max: 221184 + min: 237568 + max: 237568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jp2k40vxp + job_id: jp2krzwqp job_status: Passed torchscript_onnx: - inference_time: 2704.0 - throughput: 369.8224852071006 + inference_time: 2689.0 + throughput: 371.8854592785422 estimated_peak_memory_range: - min: 8937472 - max: 8937472 + min: 8851456 + max: 8851456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 33 - job_id: jpv6e0975 + job_id: jpv6lnlr5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:43:41Z' + timestamp: '2024-12-11T23:09:05Z' diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/info.yaml b/qai_hub_models/models/quicksrnetlarge_quantized/info.yaml index 9474c26d..0abef004 100644 --- a/qai_hub_models/models/quicksrnetlarge_quantized/info.yaml +++ b/qai_hub_models/models/quicksrnetlarge_quantized/info.yaml @@ -9,6 +9,7 @@ description: QuickSRNet Large is designed for upscaling images on mobile platfor use_case: Super Resolution tags: - quantized +imsdk_supported: true research_paper: https://arxiv.org/abs/2303.04336 research_paper_title: 'QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms' diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml b/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml index f0828d75..e6f6babc 100644 --- a/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml +++ b/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml @@ -50,15 +50,15 @@ aggregated: models: - name: QuickSRNetLarge-Quantized universal_assets: - torchscript_onnx_tflite: mnzv4r7zq - torchscript_onnx: mq3e14krm + torchscript_onnx_tflite: mq214od0m + torchscript_onnx: mnz1vzeoq performance_metrics: - torchscript_onnx_tflite: - inference_time: 1439.0 - throughput: 694.9270326615705 + inference_time: 1489.0 + throughput: 671.591672263264 estimated_peak_memory_range: - min: 20480 - max: 5580736 + min: 16384 + max: 4950336 primary_compute_unit: NPU precision: int8 layer_info: @@ -66,14 +66,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 33 - job_id: jgn6or3k5 + job_id: jgo2l0rqp job_status: Passed torchscript_onnx_qnn: - inference_time: 910.0 - throughput: 1098.901098901099 + inference_time: 914.0 + throughput: 1094.0919037199126 estimated_peak_memory_range: - min: 24576 - max: 8310496 + min: 28672 + max: 3923352 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,14 +81,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jpv6ezwk5 + job_id: j5mn020dp job_status: Passed torchscript_onnx: - inference_time: 16891.0 - throughput: 59.203125925048845 + inference_time: 16531.0 + throughput: 60.492408202770555 estimated_peak_memory_range: - min: 25055232 - max: 246105952 + min: 24424448 + max: 27225664 primary_compute_unit: NPU precision: int8 layer_info: @@ -96,7 +96,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 27 total_layers: 144 - job_id: jgn6orek5 + job_id: jgn6zy1k5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -105,13 +105,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T03:57:52Z' + timestamp: '2024-12-12T01:26:52Z' - torchscript_onnx_tflite: - inference_time: 1105.0 - throughput: 904.9773755656108 + inference_time: 1108.0 + throughput: 902.5270758122743 estimated_peak_memory_range: - min: 20480 - max: 14550784 + min: 16384 + max: 16351424 primary_compute_unit: NPU precision: int8 layer_info: @@ -119,14 +119,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 33 - job_id: jprvo1e0g + job_id: jpv6lolk5 job_status: Passed torchscript_onnx_qnn: - inference_time: 640.0 - throughput: 1562.5 + inference_time: 642.0 + throughput: 1557.632398753894 estimated_peak_memory_range: min: 12288 - max: 13007824 + max: 13750624 primary_compute_unit: NPU precision: int8 layer_info: @@ -134,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jgjvoklvg + job_id: jgn6zyzk5 job_status: Passed torchscript_onnx: - inference_time: 14179.0 - throughput: 70.52683546089287 + inference_time: 13984.0 + throughput: 71.51029748283753 estimated_peak_memory_range: - min: 26238976 - max: 107013264 + min: 26374144 + max: 107630336 primary_compute_unit: NPU precision: int8 layer_info: @@ -149,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 27 total_layers: 144 - job_id: jprvo1y0g + job_id: jprvlqx0g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -158,13 +158,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T03:57:55Z' + timestamp: '2024-12-12T01:26:55Z' - torchscript_onnx_tflite: - inference_time: 1296.0 - throughput: 771.604938271605 + inference_time: 1282.0 + throughput: 780.0312012480499 estimated_peak_memory_range: min: 12288 - max: 11865520 + max: 12794288 primary_compute_unit: NPU precision: int8 layer_info: @@ -172,14 +172,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 33 - job_id: jp2k43lrp + job_id: jgjvrmrvg job_status: Passed torchscript_onnx_qnn: - inference_time: 553.0 - throughput: 1808.3182640144666 + inference_time: 659.0 + throughput: 1517.4506828528072 estimated_peak_memory_range: min: 8192 - max: 11959456 + max: 11342032 primary_compute_unit: NPU precision: int8 layer_info: @@ -187,14 +187,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jped84vo5 + job_id: jprvlql0g job_status: Passed torchscript_onnx: - inference_time: 13266.0 - throughput: 75.38067239559777 + inference_time: 13269.0 + throughput: 75.36362951239732 estimated_peak_memory_range: - min: 23318528 - max: 74787424 + min: 28213248 + max: 81496320 primary_compute_unit: NPU precision: int8 layer_info: @@ -202,7 +202,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 27 total_layers: 144 - job_id: jp2k43mrp + job_id: jp2kr6orp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -211,13 +211,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T03:57:58Z' + timestamp: '2024-12-12T01:26:57Z' - torchscript_onnx_tflite: - inference_time: 4171.0 - throughput: 239.7506593143131 + inference_time: 4000.0 + throughput: 250.0 estimated_peak_memory_range: - min: 1585152 - max: 16237344 + min: 1589248 + max: 15667296 primary_compute_unit: NPU precision: int8 layer_info: @@ -225,14 +225,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 33 - job_id: jpy1qv68p + job_id: jped717o5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3078.0 - throughput: 324.8862897985705 + inference_time: 3135.0 + throughput: 318.9792663476874 estimated_peak_memory_range: - min: 12288 - max: 7710992 + min: 32768 + max: 7362768 primary_compute_unit: NPU precision: int8 layer_info: @@ -240,7 +240,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jgz38v7o5 + job_id: jp2kr6rrp job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -249,13 +249,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T03:57:27Z' + timestamp: '2024-12-12T01:26:27Z' - torchscript_onnx_tflite: - inference_time: 37582.0 - throughput: 26.60848278431164 + inference_time: 35319.0 + throughput: 28.313372405787252 estimated_peak_memory_range: - min: 1794048 - max: 9659240 + min: 1314816 + max: 3689232 primary_compute_unit: NPU precision: int8 layer_info: @@ -263,7 +263,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 33 - job_id: jp0zdel95 + job_id: jgz3l9lo5 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -272,13 +272,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T03:56:57Z' + timestamp: '2024-12-12T01:25:56Z' - torchscript_onnx_tflite: - inference_time: 1436.0 - throughput: 696.3788300835655 + inference_time: 1453.0 + throughput: 688.2312456985547 estimated_peak_memory_range: min: 16384 - max: 3711296 + max: 5307024 primary_compute_unit: NPU precision: int8 layer_info: @@ -286,14 +286,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 33 - job_id: jp8q6wzkp + job_id: j5welvl35 job_status: Passed torchscript_onnx_qnn: - inference_time: 679.0 - throughput: 1472.7540500736377 + inference_time: 683.0 + throughput: 1464.1288433382138 estimated_peak_memory_range: - min: 81920 - max: 1565064 + min: 86016 + max: 1208552 primary_compute_unit: NPU precision: int8 layer_info: @@ -301,7 +301,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j5we8m935 + job_id: jpy1owo8p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -310,13 +310,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T03:57:30Z' + timestamp: '2024-12-12T01:26:30Z' - torchscript_onnx_tflite: - inference_time: 14180.0 - throughput: 70.52186177715092 + inference_time: 13902.0 + throughput: 71.9320961012804 estimated_peak_memory_range: - min: 1617920 - max: 12679376 + min: 1605632 + max: 15615968 primary_compute_unit: NPU precision: int8 layer_info: @@ -324,14 +324,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 33 - job_id: jgkeor3wg + job_id: jg9lz1zwg job_status: Passed torchscript_onnx_qnn: - inference_time: 11883.0 - throughput: 84.15383320710258 + inference_time: 11906.0 + throughput: 83.99126490844952 estimated_peak_memory_range: - min: 40960 - max: 5829136 + min: 36864 + max: 10600848 primary_compute_unit: NPU precision: int8 layer_info: @@ -339,7 +339,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jp147q88p + job_id: jp8qe9ekp job_status: Passed reference_device_info: name: SA7255P ADP @@ -348,13 +348,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T03:57:36Z' + timestamp: '2024-12-12T01:26:35Z' - torchscript_onnx_tflite: - inference_time: 1446.0 - throughput: 691.5629322268327 + inference_time: 1456.0 + throughput: 686.8131868131868 estimated_peak_memory_range: - min: 16384 - max: 82737296 + min: 20480 + max: 5189760 primary_compute_unit: NPU precision: int8 layer_info: @@ -362,14 +362,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 33 - job_id: j5q6z93np + job_id: jp14nln8p job_status: Passed torchscript_onnx_qnn: - inference_time: 682.0 - throughput: 1466.275659824047 + inference_time: 679.0 + throughput: 1472.7540500736377 estimated_peak_memory_range: min: 81920 - max: 1418096 + max: 1226272 primary_compute_unit: NPU precision: int8 layer_info: @@ -377,7 +377,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jgdx87vrp + job_id: jgke2n2wg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -386,13 +386,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T03:57:38Z' + timestamp: '2024-12-12T01:26:38Z' - torchscript_onnx_tflite: - inference_time: 2491.0 - throughput: 401.4452027298274 + inference_time: 2608.0 + throughput: 383.4355828220859 estimated_peak_memory_range: - min: 16384 - max: 10472160 + min: 20480 + max: 10099312 primary_compute_unit: NPU precision: int8 layer_info: @@ -400,14 +400,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 33 - job_id: jglvoe3j5 + job_id: jgdxd9drp job_status: Passed torchscript_onnx_qnn: - inference_time: 1480.0 - throughput: 675.6756756756756 + inference_time: 1487.0 + throughput: 672.4949562878278 estimated_peak_memory_range: - min: 24576 - max: 5787424 + min: 16384 + max: 5721280 primary_compute_unit: NPU precision: int8 layer_info: @@ -415,7 +415,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j57ykvdv5 + job_id: jgo2l0lqp job_status: Passed reference_device_info: name: SA8295P ADP @@ -424,13 +424,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T03:57:41Z' + timestamp: '2024-12-12T01:26:41Z' - torchscript_onnx_tflite: - inference_time: 1451.0 - throughput: 689.1798759476223 + inference_time: 1479.0 + throughput: 676.132521974307 estimated_peak_memory_range: min: 16384 - max: 4530136 + max: 55455888 primary_compute_unit: NPU precision: int8 layer_info: @@ -438,14 +438,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 33 - job_id: j56yrqn6p + job_id: j57yewev5 job_status: Passed torchscript_onnx_qnn: - inference_time: 681.0 - throughput: 1468.4287812041116 + inference_time: 696.0 + throughput: 1436.7816091954023 estimated_peak_memory_range: - min: 73728 - max: 1236744 + min: 81920 + max: 1611056 primary_compute_unit: NPU precision: int8 layer_info: @@ -453,7 +453,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jp4lmjw85 + job_id: jg9lz1owg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -462,13 +462,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T03:57:44Z' + timestamp: '2024-12-12T01:26:44Z' - torchscript_onnx_tflite: - inference_time: 2577.0 - throughput: 388.04811796662784 + inference_time: 2609.0 + throughput: 383.28861632809503 estimated_peak_memory_range: - min: 626688 - max: 10794288 + min: 249856 + max: 15402400 primary_compute_unit: NPU precision: int8 layer_info: @@ -476,14 +476,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 33 - job_id: jp3jxqe3g + job_id: jp4lyoy85 job_status: Passed torchscript_onnx_qnn: - inference_time: 1249.0 - throughput: 800.640512409928 + inference_time: 1266.0 + throughput: 789.8894154818325 estimated_peak_memory_range: - min: 16384 - max: 5816768 + min: 12288 + max: 5960096 primary_compute_unit: NPU precision: int8 layer_info: @@ -491,7 +491,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jpxk3e135 + job_id: jpxklj035 job_status: Passed reference_device_info: name: SA8775P ADP @@ -500,13 +500,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T03:57:47Z' + timestamp: '2024-12-12T01:26:47Z' - torchscript_onnx_tflite: - inference_time: 2317.0 - throughput: 431.59257660768236 + inference_time: 1828.0 + throughput: 547.0459518599563 estimated_peak_memory_range: - min: 20480 - max: 15001040 + min: 16384 + max: 16803376 primary_compute_unit: NPU precision: int8 layer_info: @@ -514,14 +514,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 33 - job_id: jgo2oe3qp + job_id: jpxkljl35 job_status: Passed torchscript_onnx_qnn: - inference_time: 1060.0 - throughput: 943.3962264150944 + inference_time: 1057.0 + throughput: 946.073793755913 estimated_peak_memory_range: min: 12288 - max: 14439840 + max: 14093312 primary_compute_unit: NPU precision: int8 layer_info: @@ -529,7 +529,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j5mnovzdp + job_id: j5mn029dp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -538,13 +538,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T03:57:50Z' + timestamp: '2024-12-12T01:26:49Z' - torchscript_onnx_qnn: - inference_time: 798.0 - throughput: 1253.1328320802006 + inference_time: 793.0 + throughput: 1261.034047919294 estimated_peak_memory_range: - min: 135168 - max: 135168 + min: 102400 + max: 102400 primary_compute_unit: NPU precision: int8 layer_info: @@ -552,7 +552,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jg9lk94wg + job_id: jp0zmqm95 + job_status: Passed + torchscript_onnx: + inference_time: 17347.0 + throughput: 57.646855364039894 + estimated_peak_memory_range: + min: 34959360 + max: 34959360 + primary_compute_unit: NPU + precision: int8 + layer_info: + layers_on_npu: 117 + layers_on_gpu: 0 + layers_on_cpu: 27 + total_layers: 144 + job_id: jpy1ow88p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -561,4 +576,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T03:58:01Z' + timestamp: '2024-12-12T01:27:00Z' diff --git a/qai_hub_models/models/quicksrnetmedium/perf.yaml b/qai_hub_models/models/quicksrnetmedium/perf.yaml index ba840900..faa9f9d6 100644 --- a/qai_hub_models/models/quicksrnetmedium/perf.yaml +++ b/qai_hub_models/models/quicksrnetmedium/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: QuickSRNetMedium universal_assets: - torchscript_onnx_tflite: mqy3d43xm - torchscript_onnx: mmdyrld3m + torchscript_onnx_tflite: mq2142wlm + torchscript_onnx: mn0jxy1xm performance_metrics: - torchscript_onnx_tflite: - inference_time: 1422.0 - throughput: 703.2348804500704 + inference_time: 1320.0 + throughput: 757.5757575757576 estimated_peak_memory_range: - min: 32768 - max: 2762616 + min: 20480 + max: 22583792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: j56yrevnp + job_id: j56y80d6p job_status: Passed torchscript_onnx_qnn: - inference_time: 980.0 - throughput: 1020.4081632653061 + inference_time: 1085.0 + throughput: 921.6589861751152 estimated_peak_memory_range: - min: 212992 - max: 63293688 + min: 217088 + max: 19535664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: j57yk06n5 + job_id: j5welr4m5 job_status: Passed torchscript_onnx: - inference_time: 1496.0 - throughput: 668.4491978609626 + inference_time: 1525.0 + throughput: 655.7377049180328 estimated_peak_memory_range: - min: 217088 - max: 1708416 + min: 12288 + max: 8901296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 19 - job_id: j5q6zezep + job_id: jpy1oymlp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:42:54Z' + timestamp: '2024-12-11T23:08:18Z' - torchscript_onnx_tflite: - inference_time: 908.0 - throughput: 1101.3215859030836 + inference_time: 853.0 + throughput: 1172.3329425556858 estimated_peak_memory_range: min: 16384 - max: 12504624 + max: 11359456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jgo2okm1p + job_id: jp3jzrw3g job_status: Passed torchscript_onnx_qnn: - inference_time: 652.0 - throughput: 1533.7423312883436 + inference_time: 663.0 + throughput: 1508.2956259426849 estimated_peak_memory_range: - min: 208896 - max: 11830368 + min: 0 + max: 15012544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jp4lmk825 + job_id: jg9lzqd8g job_status: Passed torchscript_onnx: - inference_time: 1040.0 - throughput: 961.5384615384615 + inference_time: 1113.0 + throughput: 898.4725965858041 estimated_peak_memory_range: min: 0 - max: 24697344 + max: 25669760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 19 - job_id: jglvo6o25 + job_id: jp0zmx6n5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:42:55Z' + timestamp: '2024-12-11T23:08:19Z' - torchscript_onnx_tflite: - inference_time: 1001.0 - throughput: 999.000999000999 + inference_time: 781.0 + throughput: 1280.4097311139565 estimated_peak_memory_range: min: 12288 - max: 8895520 + max: 8603664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jpv6e04z5 + job_id: jgo2l94qp job_status: Passed torchscript_onnx_qnn: - inference_time: 691.0 - throughput: 1447.178002894356 + inference_time: 602.0 + throughput: 1661.1295681063123 estimated_peak_memory_range: min: 0 - max: 8772480 + max: 8987520 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jpxk3nm85 + job_id: jp14nm67p job_status: Passed torchscript_onnx: - inference_time: 916.0 - throughput: 1091.703056768559 + inference_time: 1070.0 + throughput: 934.5794392523364 estimated_peak_memory_range: min: 0 - max: 15893120 + max: 15566560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 19 - job_id: j56yrernp + job_id: jp8qek1op job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:42:56Z' + timestamp: '2024-12-11T23:08:20Z' - torchscript_onnx_tflite: - inference_time: 1350.0 - throughput: 740.7407407407408 + inference_time: 1390.0 + throughput: 719.4244604316547 estimated_peak_memory_range: - min: 24576 - max: 2705616 + min: 3162112 + max: 27011960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jgjvoz11g + job_id: jpv6ln7k5 job_status: Passed torchscript_onnx_qnn: - inference_time: 906.0 - throughput: 1103.7527593818984 + inference_time: 911.0 + throughput: 1097.694840834248 estimated_peak_memory_range: - min: 221184 - max: 1480240 + min: 229376 + max: 1517392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: j5mnoq47p + job_id: jgdxdm2zp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:42:45Z' + timestamp: '2024-12-11T23:08:09Z' - torchscript_onnx_tflite: - inference_time: 16142.0 - throughput: 61.950192045595344 + inference_time: 16304.0 + throughput: 61.33464180569185 estimated_peak_memory_range: - min: 6430720 - max: 16673520 + min: 0 + max: 12415664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jped8e285 + job_id: jgjvr8qvg job_status: Passed torchscript_onnx_qnn: - inference_time: 14141.0 - throughput: 70.71635669330315 + inference_time: 14023.0 + throughput: 71.31141695785496 estimated_peak_memory_range: - min: 180224 - max: 6019088 + min: 176128 + max: 10601520 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jprvo89kg + job_id: jp4ly2315 job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:42:48Z' + timestamp: '2024-12-11T23:08:11Z' - torchscript_onnx_tflite: - inference_time: 1300.0 - throughput: 769.2307692307693 + inference_time: 1417.0 + throughput: 705.7163020465773 estimated_peak_memory_range: - min: 20480 - max: 2825408 + min: 16384 + max: 2747944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jgz38ow45 + job_id: jped7nyo5 job_status: Passed torchscript_onnx_qnn: - inference_time: 936.0 - throughput: 1068.3760683760684 + inference_time: 959.0 + throughput: 1042.752867570386 estimated_peak_memory_range: - min: 225280 - max: 1438952 + min: 229376 + max: 1632896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jp2k40j6p + job_id: jpxklzxl5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:42:49Z' + timestamp: '2024-12-11T23:08:12Z' - torchscript_onnx_tflite: - inference_time: 4365.0 - throughput: 229.0950744558992 + inference_time: 2119.0 + throughput: 471.92071731949034 estimated_peak_memory_range: - min: 6311936 - max: 13577136 + min: 16384 + max: 8178928 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: j5we82x45 + job_id: jgz3l0no5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2517.0 - throughput: 397.29837107667856 + inference_time: 1842.0 + throughput: 542.8881650380022 estimated_peak_memory_range: min: 0 - max: 5761680 + max: 6079456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jpy1qrn0p + job_id: j5mn0l89p job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:42:50Z' + timestamp: '2024-12-11T23:08:13Z' - torchscript_onnx_tflite: - inference_time: 1332.0 - throughput: 750.7507507507507 + inference_time: 1365.0 + throughput: 732.6007326007326 estimated_peak_memory_range: - min: 28672 - max: 2641304 + min: 32768 + max: 2788176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jg9lkj8mg + job_id: j5welr435 job_status: Passed torchscript_onnx_qnn: - inference_time: 915.0 - throughput: 1092.896174863388 + inference_time: 940.0 + throughput: 1063.8297872340424 estimated_peak_memory_range: - min: 229376 - max: 1458232 + min: 221184 + max: 1424640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jp0zd3d05 + job_id: jgn6zwkq5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:42:51Z' + timestamp: '2024-12-11T23:08:14Z' - torchscript_onnx_tflite: - inference_time: 2414.0 - throughput: 414.25020712510354 + inference_time: 2502.0 + throughput: 399.68025579536373 estimated_peak_memory_range: - min: 421888 - max: 7994128 + min: 860160 + max: 8312816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jp147y3np + job_id: jg9lzqdwg job_status: Passed torchscript_onnx_qnn: - inference_time: 1944.0 - throughput: 514.40329218107 + inference_time: 1869.0 + throughput: 535.0454788657036 estimated_peak_memory_range: - min: 0 - max: 5887248 + min: 212992 + max: 6529152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jp8q606qp + job_id: jprvl7w7g job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:42:52Z' + timestamp: '2024-12-11T23:08:16Z' - torchscript_onnx_tflite: - inference_time: 1970.0 - throughput: 507.61421319796955 + inference_time: 1991.0 + throughput: 502.26017076845807 estimated_peak_memory_range: - min: 20480 - max: 11521456 + min: 8515584 + max: 21085504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jgdx8e06p + job_id: jp14nm68p job_status: Passed torchscript_onnx_qnn: - inference_time: 1236.0 - throughput: 809.0614886731391 + inference_time: 1257.0 + throughput: 795.5449482895783 estimated_peak_memory_range: - min: 208896 - max: 14316656 + min: 204800 + max: 15329072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jgkeo7ovg + job_id: jp2krzeqp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,13 +471,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:42:53Z' + timestamp: '2024-12-11T23:08:17Z' - torchscript_onnx_qnn: - inference_time: 1561.0 - throughput: 640.6149903907751 + inference_time: 1023.0 + throughput: 977.5171065493646 estimated_peak_memory_range: - min: 204800 - max: 204800 + min: 221184 + max: 221184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jgn6olxj5 + job_id: j57ye8995 job_status: Passed torchscript_onnx: - inference_time: 1522.0 - throughput: 657.030223390276 + inference_time: 1526.0 + throughput: 655.307994757536 estimated_peak_memory_range: - min: 8830976 - max: 8830976 + min: 8962048 + max: 8962048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 19 - job_id: jp3jxvxmg + job_id: jgke2k8ng job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:42:57Z' + timestamp: '2024-12-11T23:08:21Z' diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/info.yaml b/qai_hub_models/models/quicksrnetmedium_quantized/info.yaml index 36912e01..2cf33854 100644 --- a/qai_hub_models/models/quicksrnetmedium_quantized/info.yaml +++ b/qai_hub_models/models/quicksrnetmedium_quantized/info.yaml @@ -9,6 +9,7 @@ description: QuickSRNet Medium is designed for upscaling images on mobile platfo use_case: Super Resolution tags: - quantized +imsdk_supported: true research_paper: https://arxiv.org/abs/2303.04336 research_paper_title: 'QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms' diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml b/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml index ded963cc..8bfc24ad 100644 --- a/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml +++ b/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml @@ -50,15 +50,15 @@ aggregated: models: - name: QuickSRNetMedium-Quantized universal_assets: - torchscript_onnx_tflite: mnwe1g3pn - torchscript_onnx: mnjxkllyq + torchscript_onnx_tflite: mn4l1e60q + torchscript_onnx: mq214oylm performance_metrics: - torchscript_onnx_tflite: - inference_time: 1197.0 - throughput: 835.421888053467 + inference_time: 1117.0 + throughput: 895.2551477170994 estimated_peak_memory_range: - min: 24576 - max: 6727792 + min: 20480 + max: 2331288 primary_compute_unit: NPU precision: int8 layer_info: @@ -66,14 +66,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jpv6ezrk5 + job_id: jpy1owm7p job_status: Passed torchscript_onnx_qnn: - inference_time: 517.0 - throughput: 1934.2359767891683 + inference_time: 511.0 + throughput: 1956.9471624266146 estimated_peak_memory_range: - min: 77824 - max: 10900992 + min: 73728 + max: 10821808 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,14 +81,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jpy1qvj8p + job_id: jgz3l9nk5 job_status: Passed torchscript_onnx: - inference_time: 5435.0 - throughput: 183.99264029438822 + inference_time: 6354.0 + throughput: 157.38117721120554 estimated_peak_memory_range: - min: 12967936 - max: 14999312 + min: 13004800 + max: 14887352 primary_compute_unit: NPU precision: int8 layer_info: @@ -96,7 +96,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 14 total_layers: 75 - job_id: j5we8mq35 + job_id: jprvlq00g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -105,13 +105,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T03:56:11Z' + timestamp: '2024-12-12T01:25:10Z' - torchscript_onnx_tflite: - inference_time: 909.0 - throughput: 1100.1100110011 + inference_time: 899.0 + throughput: 1112.3470522803113 estimated_peak_memory_range: - min: 20480 - max: 12506432 + min: 12288 + max: 11383472 primary_compute_unit: NPU precision: int8 layer_info: @@ -119,14 +119,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jgjvok2vg + job_id: jp0zmq665 job_status: Passed torchscript_onnx_qnn: - inference_time: 353.0 - throughput: 2832.8611898016998 + inference_time: 359.0 + throughput: 2785.515320334262 estimated_peak_memory_range: - min: 65536 - max: 11345360 + min: 0 + max: 15474784 primary_compute_unit: NPU precision: int8 layer_info: @@ -134,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jp0zde295 + job_id: j5welv765 job_status: Passed torchscript_onnx: - inference_time: 4520.0 - throughput: 221.23893805309734 + inference_time: 4560.0 + throughput: 219.2982456140351 estimated_peak_memory_range: - min: 13234176 - max: 48197888 + min: 15450112 + max: 50115136 primary_compute_unit: NPU precision: int8 layer_info: @@ -149,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 14 total_layers: 75 - job_id: jg9lk9wwg + job_id: jp2kr6wrp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -158,13 +158,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T03:56:14Z' + timestamp: '2024-12-12T01:25:14Z' - torchscript_onnx_tflite: - inference_time: 1396.0 - throughput: 716.3323782234957 + inference_time: 896.0 + throughput: 1116.0714285714287 estimated_peak_memory_range: - min: 12288 - max: 9057888 + min: 16384 + max: 9173264 primary_compute_unit: NPU precision: int8 layer_info: @@ -172,14 +172,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jped84wo5 + job_id: jp8qe91xp job_status: Passed torchscript_onnx_qnn: - inference_time: 296.0 - throughput: 3378.3783783783783 + inference_time: 351.0 + throughput: 2849.002849002849 estimated_peak_memory_range: - min: 57344 - max: 10028032 + min: 61440 + max: 9936304 primary_compute_unit: NPU precision: int8 layer_info: @@ -187,14 +187,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jp8q6wmkp + job_id: jg9lz1mlg job_status: Passed torchscript_onnx: - inference_time: 4203.0 - throughput: 237.92529145848204 + inference_time: 4199.0 + throughput: 238.15194093831866 estimated_peak_memory_range: - min: 13103104 - max: 34552672 + min: 14331904 + max: 36475808 primary_compute_unit: NPU precision: int8 layer_info: @@ -202,7 +202,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 14 total_layers: 75 - job_id: jp147qe8p + job_id: jpy1owx8p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -211,13 +211,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T03:56:16Z' + timestamp: '2024-12-12T01:25:16Z' - torchscript_onnx_tflite: - inference_time: 2337.0 - throughput: 427.89901583226356 + inference_time: 2368.0 + throughput: 422.2972972972973 estimated_peak_memory_range: - min: 12288 - max: 11724880 + min: 24576 + max: 12186096 primary_compute_unit: NPU precision: int8 layer_info: @@ -225,14 +225,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jgz38vjo5 + job_id: jgke2n82g job_status: Passed torchscript_onnx_qnn: - inference_time: 1046.0 - throughput: 956.0229445506692 + inference_time: 1091.0 + throughput: 916.5902841429881 estimated_peak_memory_range: - min: 61440 - max: 7837872 + min: 20480 + max: 12187680 primary_compute_unit: NPU precision: int8 layer_info: @@ -240,7 +240,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jgkeorqwg + job_id: jp14nlj2p job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -249,13 +249,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T03:55:46Z' + timestamp: '2024-12-12T01:24:45Z' - torchscript_onnx_tflite: - inference_time: 12837.0 - throughput: 77.8998208304121 + inference_time: 13028.0 + throughput: 76.75775253300583 estimated_peak_memory_range: - min: 1757184 - max: 9225904 + min: 2027520 + max: 4317760 primary_compute_unit: NPU precision: int8 layer_info: @@ -263,7 +263,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: j5we8m335 + job_id: j5q6lkv4p job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -272,13 +272,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T03:55:15Z' + timestamp: '2024-12-12T01:24:14Z' - torchscript_onnx_tflite: - inference_time: 1125.0 - throughput: 888.8888888888889 + inference_time: 1119.0 + throughput: 893.6550491510277 estimated_peak_memory_range: - min: 28672 - max: 2290392 + min: 16384 + max: 61958624 primary_compute_unit: NPU precision: int8 layer_info: @@ -286,14 +286,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jg9lk9ywg + job_id: jglvyzl85 job_status: Passed torchscript_onnx_qnn: inference_time: 410.0 throughput: 2439.0243902439024 estimated_peak_memory_range: - min: 73728 - max: 1389752 + min: 69632 + max: 1584328 primary_compute_unit: NPU precision: int8 layer_info: @@ -301,7 +301,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jglvoe2j5 + job_id: jgdxd93ep job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -310,13 +310,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T03:55:49Z' + timestamp: '2024-12-12T01:24:48Z' - torchscript_onnx_tflite: - inference_time: 4580.0 - throughput: 218.34061135371178 + inference_time: 4577.0 + throughput: 218.4837229626393 estimated_peak_memory_range: - min: 1679360 - max: 11209152 + min: 1597440 + max: 14440592 primary_compute_unit: NPU precision: int8 layer_info: @@ -324,14 +324,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jp147qw8p + job_id: j56y8jw0p job_status: Passed torchscript_onnx_qnn: - inference_time: 2764.0 - throughput: 361.794500723589 + inference_time: 2741.0 + throughput: 364.8303538854433 estimated_peak_memory_range: min: 12288 - max: 5521072 + max: 10456400 primary_compute_unit: NPU precision: int8 layer_info: @@ -339,7 +339,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jp3jxq13g + job_id: jg9lz1mwg job_status: Passed reference_device_info: name: SA7255P ADP @@ -348,13 +348,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T03:55:54Z' + timestamp: '2024-12-12T01:24:54Z' - torchscript_onnx_tflite: - inference_time: 1122.0 - throughput: 891.2655971479501 + inference_time: 1113.0 + throughput: 898.4725965858041 estimated_peak_memory_range: - min: 20480 - max: 3324320 + min: 16384 + max: 2554976 primary_compute_unit: NPU precision: int8 layer_info: @@ -362,14 +362,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jgdx87qrp + job_id: jp3jz36lg job_status: Passed torchscript_onnx_qnn: - inference_time: 414.0 - throughput: 2415.458937198068 + inference_time: 415.0 + throughput: 2409.6385542168673 estimated_peak_memory_range: - min: 81920 - max: 1324432 + min: 86016 + max: 1439216 primary_compute_unit: NPU precision: int8 layer_info: @@ -377,7 +377,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jgo2oenqp + job_id: jp14nlj8p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -386,13 +386,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T03:55:57Z' + timestamp: '2024-12-12T01:24:56Z' - torchscript_onnx_tflite: - inference_time: 1909.0 - throughput: 523.8344683080147 + inference_time: 1950.0 + throughput: 512.8205128205128 estimated_peak_memory_range: - min: 16384 - max: 8545744 + min: 1593344 + max: 10020000 primary_compute_unit: NPU precision: int8 layer_info: @@ -400,14 +400,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jp4lmjd85 + job_id: jgo2l08xp job_status: Passed torchscript_onnx_qnn: - inference_time: 897.0 - throughput: 1114.8272017837235 + inference_time: 919.0 + throughput: 1088.139281828074 estimated_peak_memory_range: - min: 12288 - max: 5813952 + min: 16384 + max: 5673504 primary_compute_unit: NPU precision: int8 layer_info: @@ -415,7 +415,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jpv6ezvk5 + job_id: jgdxd93rp job_status: Passed reference_device_info: name: SA8295P ADP @@ -424,13 +424,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T03:56:00Z' + timestamp: '2024-12-12T01:24:59Z' - torchscript_onnx_tflite: - inference_time: 1111.0 - throughput: 900.0900090009001 + inference_time: 1125.0 + throughput: 888.8888888888889 estimated_peak_memory_range: - min: 16384 - max: 7917704 + min: 8192 + max: 2420024 primary_compute_unit: NPU precision: int8 layer_info: @@ -438,14 +438,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: j5mnov6dp + job_id: jpv6lo7j5 job_status: Passed torchscript_onnx_qnn: - inference_time: 441.0 - throughput: 2267.573696145125 + inference_time: 416.0 + throughput: 2403.846153846154 estimated_peak_memory_range: min: 81920 - max: 1342440 + max: 1617688 primary_compute_unit: NPU precision: int8 layer_info: @@ -453,7 +453,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jgjvokevg + job_id: j57yew4v5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -462,13 +462,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T03:56:03Z' + timestamp: '2024-12-12T01:25:02Z' - torchscript_onnx_tflite: - inference_time: 2117.0 - throughput: 472.3665564478035 + inference_time: 2146.0 + throughput: 465.98322460391427 estimated_peak_memory_range: - min: 806912 - max: 9030592 + min: 802816 + max: 14104496 primary_compute_unit: NPU precision: int8 layer_info: @@ -476,14 +476,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jprvo120g + job_id: jgjvrmqxg job_status: Passed torchscript_onnx_qnn: - inference_time: 964.0 - throughput: 1037.344398340249 + inference_time: 936.0 + throughput: 1068.3760683760684 estimated_peak_memory_range: - min: 16384 - max: 5730256 + min: 12288 + max: 5986720 primary_compute_unit: NPU precision: int8 layer_info: @@ -491,7 +491,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jped84ko5 + job_id: jpxklj435 job_status: Passed reference_device_info: name: SA8775P ADP @@ -500,13 +500,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T03:56:05Z' + timestamp: '2024-12-12T01:25:05Z' - torchscript_onnx_tflite: - inference_time: 2051.0 - throughput: 487.56704046806436 + inference_time: 1978.0 + throughput: 505.5611729019211 estimated_peak_memory_range: - min: 1613824 - max: 14730000 + min: 1593344 + max: 15973808 primary_compute_unit: NPU precision: int8 layer_info: @@ -514,14 +514,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jp2k439rp + job_id: jped71y15 job_status: Passed torchscript_onnx_qnn: - inference_time: 582.0 - throughput: 1718.213058419244 + inference_time: 594.0 + throughput: 1683.5016835016836 estimated_peak_memory_range: - min: 49152 - max: 12779744 + min: 65536 + max: 12966560 primary_compute_unit: NPU precision: int8 layer_info: @@ -529,7 +529,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jgz38vro5 + job_id: j5mn02mdp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -538,13 +538,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T03:56:08Z' + timestamp: '2024-12-12T01:25:08Z' - torchscript_onnx_qnn: - inference_time: 502.0 - throughput: 1992.03187250996 + inference_time: 507.0 + throughput: 1972.3865877712033 estimated_peak_memory_range: - min: 192512 - max: 192512 + min: 131072 + max: 131072 primary_compute_unit: NPU precision: int8 layer_info: @@ -552,14 +552,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: j56yrqz6p + job_id: j5welv735 job_status: Passed torchscript_onnx: - inference_time: 6063.0 - throughput: 164.93485073396008 + inference_time: 5915.0 + throughput: 169.06170752324599 estimated_peak_memory_range: - min: 15601664 - max: 15601664 + min: 15540224 + max: 15540224 primary_compute_unit: NPU precision: int8 layer_info: @@ -567,7 +567,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 14 total_layers: 75 - job_id: jgdx87orp + job_id: jp0zmqj95 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -576,4 +576,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T03:56:19Z' + timestamp: '2024-12-12T01:25:19Z' diff --git a/qai_hub_models/models/quicksrnetsmall/perf.yaml b/qai_hub_models/models/quicksrnetsmall/perf.yaml index 142cb4b0..75fe9fe6 100644 --- a/qai_hub_models/models/quicksrnetsmall/perf.yaml +++ b/qai_hub_models/models/quicksrnetsmall/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: QuickSRNetSmall universal_assets: - torchscript_onnx_tflite: mno354gkn - torchscript_onnx: mnzvw0xpq + torchscript_onnx_tflite: mn4l19y7q + torchscript_onnx: mn1wz7gpm performance_metrics: - torchscript_onnx_tflite: - inference_time: 1318.0 - throughput: 758.7253414264036 + inference_time: 1345.0 + throughput: 743.4944237918215 estimated_peak_memory_range: - min: 16384 - max: 4149056 + min: 32768 + max: 5188152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jpy1qr40p + job_id: jp2krz4rp job_status: Passed torchscript_onnx_qnn: - inference_time: 1008.0 - throughput: 992.063492063492 + inference_time: 1002.0 + throughput: 998.003992015968 estimated_peak_memory_range: - min: 212992 - max: 27569144 + min: 225280 + max: 13481496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jgjvozk1g + job_id: jgjvr8wvg job_status: Passed torchscript_onnx: - inference_time: 1445.0 - throughput: 692.0415224913495 + inference_time: 1432.0 + throughput: 698.3240223463687 estimated_peak_memory_range: - min: 16384 - max: 1919152 + min: 12288 + max: 11206312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 13 - job_id: jgn6olrj5 + job_id: jgn6zw7k5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:42:13Z' + timestamp: '2024-12-11T23:07:34Z' - torchscript_onnx_tflite: - inference_time: 880.0 - throughput: 1136.3636363636363 + inference_time: 810.0 + throughput: 1234.567901234568 estimated_peak_memory_range: min: 20480 - max: 11182896 + max: 11981216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jp0zd3e05 + job_id: jpy1oyq8p job_status: Passed torchscript_onnx_qnn: - inference_time: 633.0 - throughput: 1579.778830963665 + inference_time: 630.0 + throughput: 1587.3015873015872 estimated_peak_memory_range: min: 0 - max: 10049728 + max: 9840288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jped8e485 + job_id: jped7nlo5 job_status: Passed torchscript_onnx: - inference_time: 978.0 - throughput: 1022.4948875255624 + inference_time: 939.0 + throughput: 1064.9627263045793 estimated_peak_memory_range: min: 0 - max: 22030128 + max: 22251440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 13 - job_id: jprvo81kg + job_id: jprvl7n0g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:42:13Z' + timestamp: '2024-12-11T23:07:35Z' - torchscript_onnx_tflite: - inference_time: 982.0 - throughput: 1018.3299389002036 + inference_time: 1015.0 + throughput: 985.2216748768473 estimated_peak_memory_range: - min: 16384 - max: 8527632 + min: 12288 + max: 8333312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jp8q60wqp + job_id: jp0zmxd95 job_status: Passed torchscript_onnx_qnn: - inference_time: 580.0 - throughput: 1724.1379310344828 + inference_time: 669.0 + throughput: 1494.7683109118086 estimated_peak_memory_range: min: 0 - max: 8856640 + max: 8305376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jgz38ov45 + job_id: jgz3l04o5 job_status: Passed torchscript_onnx: - inference_time: 977.0 - throughput: 1023.5414534288639 + inference_time: 973.0 + throughput: 1027.749229188078 estimated_peak_memory_range: min: 0 - max: 14806176 + max: 15285376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 13 - job_id: jp2k4036p + job_id: jp2krzvrp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,28 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:42:14Z' - - torchscript_onnx_tflite: - inference_time: 1340.0 - throughput: 746.2686567164179 - estimated_peak_memory_range: - min: 24576 - max: 2231088 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 8 - layers_on_gpu: 0 - layers_on_cpu: 3 - total_layers: 11 - job_id: jgkeo7rvg - job_status: Passed - torchscript_onnx_qnn: - inference_time: 842.0 - throughput: 1187.648456057007 + timestamp: '2024-12-11T23:07:36Z' + - torchscript_onnx_qnn: + inference_time: 872.0 + throughput: 1146.788990825688 estimated_peak_memory_range: - min: 0 - max: 1231248 + min: 225280 + max: 2019896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +219,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: j5we82m45 + job_id: j5welr135 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +228,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:42:04Z' + timestamp: '2024-12-11T23:07:24Z' - torchscript_onnx_tflite: - inference_time: 10099.0 - throughput: 99.01970492127934 + inference_time: 9886.0 + throughput: 101.15314586283634 estimated_peak_memory_range: - min: 6307840 - max: 15875056 + min: 6316032 + max: 18263984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +242,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: j5q6ze9ep + job_id: jgke2kowg job_status: Passed torchscript_onnx_qnn: - inference_time: 8869.0 - throughput: 112.75228323373548 + inference_time: 8731.0 + throughput: 114.53441759248655 estimated_peak_memory_range: - min: 196608 - max: 6013024 + min: 233472 + max: 10786544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +257,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jp147yqnp + job_id: jp14nmv8p job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +266,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:42:06Z' + timestamp: '2024-12-11T23:07:27Z' - torchscript_onnx_tflite: - inference_time: 1357.0 - throughput: 736.9196757553427 + inference_time: 1335.0 + throughput: 749.0636704119851 estimated_peak_memory_range: - min: 16384 - max: 3259856 + min: 3162112 + max: 5555768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +280,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jglvo6e25 + job_id: j5q6ldznp job_status: Passed torchscript_onnx_qnn: - inference_time: 857.0 - throughput: 1166.8611435239206 + inference_time: 844.0 + throughput: 1184.8341232227488 estimated_peak_memory_range: - min: 229376 - max: 1849096 + min: 221184 + max: 1733136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +295,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jgdx8e76p + job_id: jgdxdmzrp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +304,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:42:07Z' + timestamp: '2024-12-11T23:07:28Z' - torchscript_onnx_tflite: - inference_time: 2006.0 - throughput: 498.5044865403789 + inference_time: 2311.0 + throughput: 432.7131112072696 estimated_peak_memory_range: - min: 20480 - max: 7822512 + min: 16384 + max: 7412112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +318,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: j56yreqnp + job_id: jglvyqoj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1830.0 - throughput: 546.448087431694 + inference_time: 1641.0 + throughput: 609.3845216331505 estimated_peak_memory_range: min: 0 - max: 6010784 + max: 6075488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +333,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: j57yk0vn5 + job_id: j57ye87v5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +342,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:42:08Z' + timestamp: '2024-12-11T23:07:29Z' - torchscript_onnx_tflite: - inference_time: 1364.0 - throughput: 733.1378299120234 + inference_time: 1471.0 + throughput: 679.8096532970768 estimated_peak_memory_range: - min: 16384 - max: 4674048 + min: 6307840 + max: 71007200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +356,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jp3jxvqmg + job_id: j56y80r6p job_status: Passed torchscript_onnx_qnn: - inference_time: 872.0 - throughput: 1146.788990825688 + inference_time: 855.0 + throughput: 1169.5906432748538 estimated_peak_memory_range: - min: 229376 - max: 1528120 + min: 233472 + max: 1425240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +371,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jp4lmkj25 + job_id: jp4ly2985 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +380,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:42:09Z' + timestamp: '2024-12-11T23:07:30Z' - torchscript_onnx_tflite: - inference_time: 2384.0 - throughput: 419.46308724832215 + inference_time: 2345.0 + throughput: 426.43923240938165 estimated_peak_memory_range: - min: 1110016 - max: 8544112 + min: 16384 + max: 6950768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +394,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jgo2oke1p + job_id: jp3jzrx3g job_status: Passed torchscript_onnx_qnn: - inference_time: 1682.0 - throughput: 594.5303210463734 + inference_time: 1618.0 + throughput: 618.0469715698393 estimated_peak_memory_range: - min: 0 - max: 5890512 + min: 212992 + max: 6197808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +409,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jpxk3ne85 + job_id: jpxklzd35 job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +418,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:42:10Z' + timestamp: '2024-12-11T23:07:32Z' - torchscript_onnx_tflite: - inference_time: 2059.0 - throughput: 485.67265662943174 + inference_time: 2050.0 + throughput: 487.8048780487805 estimated_peak_memory_range: - min: 20480 - max: 12374624 + min: 6307840 + max: 19120832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +432,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jpv6e0zz5 + job_id: jpv6ln9k5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1107.0 - throughput: 903.342366757001 + inference_time: 1132.0 + throughput: 883.3922261484099 estimated_peak_memory_range: min: 208896 - max: 13782224 + max: 13217504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +447,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: j5mnoqv7p + job_id: j5mn0lddp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +456,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:42:12Z' + timestamp: '2024-12-11T23:07:33Z' - torchscript_onnx_qnn: - inference_time: 950.0 - throughput: 1052.6315789473683 + inference_time: 939.0 + throughput: 1064.9627263045793 estimated_peak_memory_range: min: 204800 max: 204800 @@ -485,14 +470,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jg9lkj9mg + job_id: jg9lzqxwg job_status: Passed torchscript_onnx: - inference_time: 1522.0 - throughput: 657.030223390276 + inference_time: 1423.0 + throughput: 702.7406886858749 estimated_peak_memory_range: - min: 8941568 - max: 8941568 + min: 8962048 + max: 8962048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +485,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 13 - job_id: jpy1qrv0p + job_id: jpy1oy78p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +494,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:42:15Z' + timestamp: '2024-12-11T23:07:37Z' diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/info.yaml b/qai_hub_models/models/quicksrnetsmall_quantized/info.yaml index 0891b402..2c794339 100644 --- a/qai_hub_models/models/quicksrnetsmall_quantized/info.yaml +++ b/qai_hub_models/models/quicksrnetsmall_quantized/info.yaml @@ -9,6 +9,7 @@ description: QuickSRNet Small is designed for upscaling images on mobile platfor use_case: Super Resolution tags: - quantized +imsdk_supported: true research_paper: https://arxiv.org/abs/2303.04336 research_paper_title: 'QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms' diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml b/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml index 133baff4..46800f69 100644 --- a/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml +++ b/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml @@ -50,15 +50,15 @@ aggregated: models: - name: QuickSRNetSmall-Quantized universal_assets: - torchscript_onnx_tflite: mqy3w6v9m - torchscript_onnx: mnzv4ropq + torchscript_onnx_tflite: mnw8ew1rn + torchscript_onnx: mqp3z2rom performance_metrics: - torchscript_onnx_tflite: - inference_time: 1083.0 - throughput: 923.3610341643582 + inference_time: 1078.0 + throughput: 927.643784786642 estimated_peak_memory_range: min: 24576 - max: 2477376 + max: 57020752 primary_compute_unit: NPU precision: int8 layer_info: @@ -66,14 +66,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 13 - job_id: jp0zde465 + job_id: jg9lz1xlg job_status: Passed torchscript_onnx_qnn: - inference_time: 467.0 - throughput: 2141.3276231263385 + inference_time: 466.0 + throughput: 2145.922746781116 estimated_peak_memory_range: - min: 86016 - max: 2091744 + min: 69632 + max: 2125232 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,14 +81,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jp147qy2p + job_id: jgke2n92g job_status: Passed torchscript_onnx: - inference_time: 3516.0 - throughput: 284.4141069397042 + inference_time: 3627.0 + throughput: 275.70995312930796 estimated_peak_memory_range: - min: 14299136 - max: 16167656 + min: 11874304 + max: 13663432 primary_compute_unit: NPU precision: int8 layer_info: @@ -96,7 +96,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 8 total_layers: 45 - job_id: jp0zde395 + job_id: jgdxd92ep job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -105,13 +105,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T03:54:30Z' + timestamp: '2024-12-12T01:23:29Z' - torchscript_onnx_tflite: - inference_time: 885.0 - throughput: 1129.9435028248588 + inference_time: 878.0 + throughput: 1138.9521640091116 estimated_peak_memory_range: min: 16384 - max: 10307760 + max: 11636032 primary_compute_unit: NPU precision: int8 layer_info: @@ -119,14 +119,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 13 - job_id: jp8q6w2xp + job_id: jp14nlv2p job_status: Passed torchscript_onnx_qnn: - inference_time: 314.0 - throughput: 3184.7133757961783 + inference_time: 318.0 + throughput: 3144.6540880503144 estimated_peak_memory_range: - min: 65536 - max: 10987072 + min: 73728 + max: 10692640 primary_compute_unit: NPU precision: int8 layer_info: @@ -134,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jgdx87eep + job_id: j5q6lkm4p job_status: Passed torchscript_onnx: - inference_time: 2807.0 - throughput: 356.2522265764161 + inference_time: 2780.0 + throughput: 359.71223021582733 estimated_peak_memory_range: min: 0 - max: 29638480 + max: 30158816 primary_compute_unit: NPU precision: int8 layer_info: @@ -149,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 8 total_layers: 45 - job_id: jp8q6w0kp + job_id: j57yew9l5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -158,13 +158,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T03:54:33Z' + timestamp: '2024-12-12T01:23:32Z' - torchscript_onnx_tflite: - inference_time: 1327.0 - throughput: 753.5795026375282 + inference_time: 1138.0 + throughput: 878.7346221441124 estimated_peak_memory_range: min: 12288 - max: 9129056 + max: 8817696 primary_compute_unit: NPU precision: int8 layer_info: @@ -172,14 +172,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 13 - job_id: jgkeorv2g + job_id: jgdxd9zep job_status: Passed torchscript_onnx_qnn: - inference_time: 343.0 - throughput: 2915.451895043732 + inference_time: 337.0 + throughput: 2967.359050445104 estimated_peak_memory_range: min: 61440 - max: 9671440 + max: 9152016 primary_compute_unit: NPU precision: int8 layer_info: @@ -187,14 +187,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jg9lk9jwg + job_id: j56y8jd0p job_status: Passed torchscript_onnx: - inference_time: 2231.0 - throughput: 448.22949350067233 + inference_time: 2610.0 + throughput: 383.1417624521073 estimated_peak_memory_range: min: 0 - max: 18232864 + max: 18455632 primary_compute_unit: NPU precision: int8 layer_info: @@ -202,7 +202,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 8 total_layers: 45 - job_id: jgkeor7wg + job_id: jp4lyo3v5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -211,13 +211,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T03:54:36Z' + timestamp: '2024-12-12T01:23:34Z' - torchscript_onnx_tflite: - inference_time: 3249.0 - throughput: 307.7870113881194 + inference_time: 2296.0 + throughput: 435.54006968641113 estimated_peak_memory_range: min: 12288 - max: 9568704 + max: 14901248 primary_compute_unit: NPU precision: int8 layer_info: @@ -225,14 +225,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 13 - job_id: j5q6z904p + job_id: j57yew7l5 job_status: Passed torchscript_onnx_qnn: - inference_time: 941.0 - throughput: 1062.6992561105208 + inference_time: 957.0 + throughput: 1044.932079414838 estimated_peak_memory_range: - min: 12288 - max: 7924224 + min: 16384 + max: 6930416 primary_compute_unit: NPU precision: int8 layer_info: @@ -240,7 +240,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jp147qy8p + job_id: jp3jz3wlg job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -249,13 +249,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T03:54:05Z' + timestamp: '2024-12-12T01:23:03Z' - torchscript_onnx_tflite: - inference_time: 10691.0 - throughput: 93.53661958656814 + inference_time: 10353.0 + throughput: 96.59036028204385 estimated_peak_memory_range: - min: 1671168 - max: 3953056 + min: 12288 + max: 1977416 primary_compute_unit: NPU precision: int8 layer_info: @@ -263,7 +263,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 13 - job_id: jglvoe485 + job_id: jp4lyo9v5 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -272,13 +272,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T03:53:35Z' + timestamp: '2024-12-12T01:22:32Z' - torchscript_onnx_tflite: - inference_time: 1083.0 - throughput: 923.3610341643582 + inference_time: 1081.0 + throughput: 925.0693802035153 estimated_peak_memory_range: min: 24576 - max: 1961952 + max: 2219696 primary_compute_unit: NPU precision: int8 layer_info: @@ -286,14 +286,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 13 - job_id: j56yrq20p + job_id: jpxkljd15 job_status: Passed torchscript_onnx_qnn: - inference_time: 386.0 - throughput: 2590.6735751295337 + inference_time: 392.0 + throughput: 2551.0204081632655 estimated_peak_memory_range: - min: 0 - max: 1652016 + min: 106496 + max: 1355456 primary_compute_unit: NPU precision: int8 layer_info: @@ -301,7 +301,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jgdx87erp + job_id: jgo2l04xp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -310,13 +310,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T03:54:08Z' + timestamp: '2024-12-12T01:23:06Z' - torchscript_onnx_tflite: - inference_time: 3889.0 - throughput: 257.1355104139882 + inference_time: 3980.0 + throughput: 251.25628140703517 estimated_peak_memory_range: min: 1589248 - max: 11076448 + max: 13906960 primary_compute_unit: NPU precision: int8 layer_info: @@ -324,14 +324,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 13 - job_id: jp3jxqnlg + job_id: j5mn02dwp job_status: Passed torchscript_onnx_qnn: - inference_time: 1927.0 - throughput: 518.9413596263622 + inference_time: 2033.0 + throughput: 491.88391539596654 estimated_peak_memory_range: - min: 24576 - max: 5764400 + min: 12288 + max: 10561568 primary_compute_unit: NPU precision: int8 layer_info: @@ -339,7 +339,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jp4lmjk85 + job_id: jgjvrmwxg job_status: Passed reference_device_info: name: SA7255P ADP @@ -348,13 +348,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T03:54:13Z' + timestamp: '2024-12-12T01:23:12Z' - torchscript_onnx_tflite: - inference_time: 1073.0 - throughput: 931.9664492078285 + inference_time: 1066.0 + throughput: 938.0863039399625 estimated_peak_memory_range: - min: 16384 - max: 12877232 + min: 28672 + max: 3303296 primary_compute_unit: NPU precision: int8 layer_info: @@ -362,14 +362,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 13 - job_id: jgo2oezxp + job_id: jgn6zy7r5 job_status: Passed torchscript_onnx_qnn: - inference_time: 402.0 - throughput: 2487.5621890547263 + inference_time: 432.0 + throughput: 2314.814814814815 estimated_peak_memory_range: - min: 81920 - max: 1447968 + min: 73728 + max: 1379744 primary_compute_unit: NPU precision: int8 layer_info: @@ -377,7 +377,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jpxk3en35 + job_id: jped71l15 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -386,13 +386,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T03:54:16Z' + timestamp: '2024-12-12T01:23:15Z' - torchscript_onnx_tflite: - inference_time: 1806.0 - throughput: 553.7098560354374 + inference_time: 1851.0 + throughput: 540.2485143165857 estimated_peak_memory_range: - min: 16384 - max: 7386032 + min: 802816 + max: 8392208 primary_compute_unit: NPU precision: int8 layer_info: @@ -400,14 +400,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 13 - job_id: jgjvokdxg + job_id: jprvlqn9g job_status: Passed torchscript_onnx_qnn: - inference_time: 893.0 - throughput: 1119.8208286674133 + inference_time: 981.0 + throughput: 1019.367991845056 estimated_peak_memory_range: min: 12288 - max: 5874336 + max: 5670192 primary_compute_unit: NPU precision: int8 layer_info: @@ -415,7 +415,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: j5mnovqdp + job_id: jgz3l94k5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -424,13 +424,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T03:54:19Z' + timestamp: '2024-12-12T01:23:18Z' - torchscript_onnx_tflite: - inference_time: 1077.0 - throughput: 928.5051067780872 + inference_time: 1084.0 + throughput: 922.509225092251 estimated_peak_memory_range: - min: 20480 - max: 13541336 + min: 16384 + max: 66778392 primary_compute_unit: NPU precision: int8 layer_info: @@ -438,14 +438,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 13 - job_id: jped84o15 + job_id: jp2kr6v4p job_status: Passed torchscript_onnx_qnn: - inference_time: 400.0 - throughput: 2500.0 + inference_time: 390.0 + throughput: 2564.102564102564 estimated_peak_memory_range: - min: 73728 - max: 1669680 + min: 77824 + max: 1322840 primary_compute_unit: NPU precision: int8 layer_info: @@ -453,7 +453,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jgn6orlk5 + job_id: j5welv465 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -462,13 +462,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T03:54:22Z' + timestamp: '2024-12-12T01:23:20Z' - torchscript_onnx_tflite: - inference_time: 2065.0 - throughput: 484.26150121065376 + inference_time: 2068.0 + throughput: 483.55899419729207 estimated_peak_memory_range: - min: 401408 - max: 7726096 + min: 806912 + max: 7966384 primary_compute_unit: NPU precision: int8 layer_info: @@ -476,14 +476,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 13 - job_id: j5we8m265 + job_id: jp0zmqv65 job_status: Passed torchscript_onnx_qnn: - inference_time: 890.0 - throughput: 1123.5955056179776 + inference_time: 907.0 + throughput: 1102.5358324145534 estimated_peak_memory_range: min: 12288 - max: 5734544 + max: 5977056 primary_compute_unit: NPU precision: int8 layer_info: @@ -491,7 +491,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jprvo180g + job_id: jg9lz1dlg job_status: Passed reference_device_info: name: SA8775P ADP @@ -500,13 +500,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T03:54:25Z' + timestamp: '2024-12-12T01:23:23Z' - torchscript_onnx_tflite: - inference_time: 1312.0 - throughput: 762.1951219512196 + inference_time: 1815.0 + throughput: 550.9641873278237 estimated_peak_memory_range: - min: 20480 - max: 10200240 + min: 1589248 + max: 13069440 primary_compute_unit: NPU precision: int8 layer_info: @@ -514,14 +514,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 13 - job_id: jg9lk9jlg + job_id: jp8qe94xp job_status: Passed torchscript_onnx_qnn: inference_time: 539.0 throughput: 1855.287569573284 estimated_peak_memory_range: min: 0 - max: 11071024 + max: 12372640 primary_compute_unit: NPU precision: int8 layer_info: @@ -529,7 +529,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jpy1qvr8p + job_id: jp14nl62p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -538,13 +538,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T03:54:27Z' + timestamp: '2024-12-12T01:23:26Z' - torchscript_onnx_qnn: - inference_time: 491.0 - throughput: 2036.6598778004072 + inference_time: 479.0 + throughput: 2087.6826722338205 estimated_peak_memory_range: - min: 118784 - max: 118784 + min: 192512 + max: 192512 primary_compute_unit: NPU precision: int8 layer_info: @@ -552,7 +552,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: j57ykv0v5 + job_id: jpv6lo9j5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -561,4 +561,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T03:54:39Z' + timestamp: '2024-12-12T01:23:37Z' diff --git a/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml b/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml index 0dfd636b..52f2b747 100644 --- a/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml +++ b/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: Real-ESRGAN-General-x4v3 universal_assets: - torchscript_onnx_tflite: mm6v8jj5q - torchscript_onnx: mq3e23xkm + torchscript_onnx_tflite: mq9lp00lq + torchscript_onnx: mq21425jm performance_metrics: - torchscript_onnx_tflite: - inference_time: 7374.0 - throughput: 135.61160835367508 + inference_time: 7538.0 + throughput: 132.66118333775538 estimated_peak_memory_range: - min: 16384 - max: 10204376 + min: 9478144 + max: 20425240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: jgdx8ev6p + job_id: j57ye86v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 7042.0 - throughput: 142.00511218403864 + inference_time: 6994.0 + throughput: 142.9796968830426 estimated_peak_memory_range: - min: 20480 - max: 16263752 + min: 24576 + max: 11148584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: j5q6ze7ep + job_id: jgke2kdwg job_status: Passed torchscript_onnx: - inference_time: 6918.0 - throughput: 144.55044810638913 + inference_time: 7022.0 + throughput: 142.40956992309884 estimated_peak_memory_range: - min: 6692864 - max: 8119176 + min: 12288 + max: 32892856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 74 - job_id: jp147ydnp + job_id: jg9lzqkwg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:41:28Z' + timestamp: '2024-12-11T23:06:49Z' - torchscript_onnx_tflite: inference_time: 5356.0 throughput: 186.70649738610905 estimated_peak_memory_range: - min: 9453568 - max: 30714912 + min: 9469952 + max: 37657232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: j57yk0dn5 + job_id: jp4ly2885 job_status: Passed torchscript_onnx_qnn: - inference_time: 4918.0 - throughput: 203.334688897926 + inference_time: 4917.0 + throughput: 203.3760423022168 estimated_peak_memory_range: - min: 0 - max: 21088496 + min: 208896 + max: 28251600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jglvo6025 + job_id: j5q6ldwnp job_status: Passed torchscript_onnx: - inference_time: 4990.0 - throughput: 200.40080160320642 + inference_time: 4985.0 + throughput: 200.60180541624874 estimated_peak_memory_range: - min: 6529024 - max: 81693600 + min: 3620864 + max: 78812528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 74 - job_id: jgdx8er6p + job_id: jp14nm78p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:41:30Z' + timestamp: '2024-12-11T23:06:51Z' - torchscript_onnx_tflite: - inference_time: 5184.0 - throughput: 192.90123456790124 + inference_time: 4435.0 + throughput: 225.4791431792559 estimated_peak_memory_range: - min: 9469952 - max: 29626560 + min: 131072 + max: 23899120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: jp4lmkw25 + job_id: jpxklzm35 job_status: Passed torchscript_onnx_qnn: - inference_time: 4682.0 - throughput: 213.58393848782572 + inference_time: 4681.0 + throughput: 213.62956633198036 estimated_peak_memory_range: min: 208896 - max: 19460080 + max: 25191952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: j56yre3np + job_id: jglvyq7j5 job_status: Passed torchscript_onnx: - inference_time: 3902.0 - throughput: 256.278831368529 + inference_time: 4692.0 + throughput: 213.12872975277068 estimated_peak_memory_range: - min: 0 - max: 30771328 + min: 6938624 + max: 37586784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 74 - job_id: j57yk0jn5 + job_id: jgdxdm8rp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:41:31Z' + timestamp: '2024-12-11T23:06:52Z' - torchscript_onnx_tflite: - inference_time: 7270.0 - throughput: 137.5515818431912 + inference_time: 7387.0 + throughput: 135.37295248409367 estimated_peak_memory_range: - min: 9482240 - max: 19922864 + min: 9457664 + max: 14679576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: j5mnoqz7p + job_id: j5mn0l4dp job_status: Passed torchscript_onnx_qnn: - inference_time: 6192.0 - throughput: 161.49870801033592 + inference_time: 6237.0 + throughput: 160.333493666827 estimated_peak_memory_range: - min: 249856 - max: 1561408 + min: 286720 + max: 1609584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jp3jxv4mg + job_id: j56y80v6p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:41:19Z' + timestamp: '2024-12-11T23:06:40Z' - torchscript_onnx_tflite: - inference_time: 243988.0 - throughput: 4.09856222437169 + inference_time: 243519.0 + throughput: 4.106455759098879 estimated_peak_memory_range: - min: 9457664 - max: 29144912 + min: 8499200 + max: 30986928 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: jgn6olej5 + job_id: jgn6zwxk5 job_status: Passed torchscript_onnx_qnn: - inference_time: 237989.0 - throughput: 4.201874876569925 + inference_time: 238013.0 + throughput: 4.201451181237999 estimated_peak_memory_range: - min: 438272 - max: 6138848 + min: 282624 + max: 10975296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jpv6e01z5 + job_id: jgo2l9mqp job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:41:21Z' + timestamp: '2024-12-11T23:06:43Z' - torchscript_onnx_tflite: - inference_time: 7358.0 - throughput: 135.9064963305246 + inference_time: 7446.0 + throughput: 134.30029546065 estimated_peak_memory_range: - min: 9482240 - max: 20494472 + min: 9474048 + max: 20194728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: jp2k40m6p + job_id: jprvl790g job_status: Passed torchscript_onnx_qnn: - inference_time: 6212.0 - throughput: 160.97875080489376 + inference_time: 6238.0 + throughput: 160.3077909586406 estimated_peak_memory_range: - min: 270336 - max: 1562488 + min: 258048 + max: 1905608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jgjvoz01g + job_id: jpv6lnek5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:41:22Z' + timestamp: '2024-12-11T23:06:44Z' - torchscript_onnx_tflite: - inference_time: 12272.0 - throughput: 81.48631029986961 + inference_time: 12287.0 + throughput: 81.3868316106454 estimated_peak_memory_range: min: 9482240 - max: 30564496 + max: 30907280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: jpy1qrd0p + job_id: jp2krzjrp job_status: Passed torchscript_onnx_qnn: - inference_time: 11334.0 - throughput: 88.23010411152285 + inference_time: 10825.0 + throughput: 92.37875288683603 estimated_peak_memory_range: - min: 253952 - max: 6354768 + min: 245760 + max: 6292432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jped8er85 + job_id: jgjvr8ovg job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:41:24Z' + timestamp: '2024-12-11T23:06:45Z' - torchscript_onnx_tflite: - inference_time: 7493.0 - throughput: 133.45789403443214 + inference_time: 7377.0 + throughput: 135.556459265284 estimated_peak_memory_range: - min: 9469952 - max: 19603704 + min: 9461760 + max: 20382728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: jp0zd3105 + job_id: jpy1oyn8p job_status: Passed torchscript_onnx_qnn: - inference_time: 6227.0 - throughput: 160.59097478721696 + inference_time: 6234.0 + throughput: 160.41065126724413 estimated_peak_memory_range: - min: 249856 - max: 1836560 + min: 282624 + max: 1509048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jgz38ox45 + job_id: jped7n8o5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:41:25Z' + timestamp: '2024-12-11T23:06:46Z' - torchscript_onnx_tflite: - inference_time: 13904.0 - throughput: 71.92174913693901 + inference_time: 14010.0 + throughput: 71.3775874375446 estimated_peak_memory_range: - min: 9469952 - max: 29168128 + min: 9474048 + max: 33172640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: jp8q603qp + job_id: jp0zmxk95 job_status: Passed torchscript_onnx_qnn: - inference_time: 12075.0 - throughput: 82.81573498964804 + inference_time: 12122.0 + throughput: 82.49463784853984 estimated_peak_memory_range: - min: 217088 - max: 6012368 + min: 249856 + max: 5997808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: j5we82d45 + job_id: jgz3l08o5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:41:26Z' + timestamp: '2024-12-11T23:06:47Z' - torchscript_onnx_tflite: - inference_time: 12394.0 - throughput: 80.68420203324189 + inference_time: 11406.0 + throughput: 87.67315448009819 estimated_peak_memory_range: - min: 9478144 - max: 38500208 + min: 9457664 + max: 41647840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: jgkeo7lvg + job_id: jp8qek8kp job_status: Passed torchscript_onnx_qnn: - inference_time: 9843.0 - throughput: 101.5950421619425 + inference_time: 9737.0 + throughput: 102.70103728047653 estimated_peak_memory_range: - min: 208896 - max: 27322336 + min: 225280 + max: 32321376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jg9lkj3mg + job_id: j5welr835 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:41:27Z' + timestamp: '2024-12-11T23:06:48Z' - torchscript_onnx_qnn: - inference_time: 6752.0 - throughput: 148.1042654028436 + inference_time: 6710.0 + throughput: 149.03129657228018 estimated_peak_memory_range: min: 221184 max: 221184 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jgo2ok11p + job_id: jp3jzr83g job_status: Passed torchscript_onnx: - inference_time: 7792.0 - throughput: 128.33675564681724 + inference_time: 7825.0 + throughput: 127.79552715654953 estimated_peak_memory_range: - min: 8945664 - max: 8945664 + min: 8863744 + max: 8863744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 74 - job_id: jp4lmkx25 + job_id: j57ye8kv5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:41:32Z' + timestamp: '2024-12-11T23:06:54Z' diff --git a/qai_hub_models/models/real_esrgan_x4plus/perf.yaml b/qai_hub_models/models/real_esrgan_x4plus/perf.yaml index ef094b6a..3b48676c 100644 --- a/qai_hub_models/models/real_esrgan_x4plus/perf.yaml +++ b/qai_hub_models/models/real_esrgan_x4plus/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: Real-ESRGAN-x4plus universal_assets: - torchscript_onnx_tflite: mn41r4lvn - torchscript_onnx: mqpz0k4jn + torchscript_onnx_tflite: mm5ed8g6m + torchscript_onnx: mnw8e76kn performance_metrics: - torchscript_onnx_tflite: - inference_time: 70240.0 - throughput: 14.236902050113896 + inference_time: 68711.0 + throughput: 14.55371046848394 estimated_peak_memory_range: - min: 135168 - max: 119279840 + min: 110592 + max: 37122312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jped8ekv5 + job_id: jg9lzq9lg job_status: Passed torchscript_onnx_qnn: - inference_time: 67480.0 - throughput: 14.819205690574986 + inference_time: 68101.0 + throughput: 14.68407218689887 estimated_peak_memory_range: - min: 61440 - max: 38056080 + min: 139264 + max: 42668032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1029 - job_id: jprvo8e7g + job_id: j5mn0lvdp job_status: Passed torchscript_onnx: - inference_time: 65834.0 - throughput: 15.189719597776225 + inference_time: 67527.0 + throughput: 14.80889125831149 estimated_peak_memory_range: - min: 180224 - max: 44458232 + min: 106496 + max: 481930152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1030 - job_id: jgjvozleg + job_id: jgo2l9eqp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:40:42Z' + timestamp: '2024-12-11T23:06:05Z' - torchscript_onnx_tflite: - inference_time: 51620.0 - throughput: 19.37233630375823 + inference_time: 51377.0 + throughput: 19.46396247348035 estimated_peak_memory_range: - min: 205008896 - max: 319021824 + min: 1257472 + max: 118126528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jgz38orx5 + job_id: jp14nmq2p job_status: Passed torchscript_onnx_qnn: - inference_time: 50354.0 - throughput: 19.859395480001588 + inference_time: 50277.0 + throughput: 19.88981045010641 estimated_peak_memory_range: - min: 57344 - max: 112598064 + min: 69632 + max: 111354384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1029 - job_id: jpy1qr6lp + job_id: jgn6zwrk5 job_status: Passed torchscript_onnx: - inference_time: 53489.0 - throughput: 18.695432705789976 + inference_time: 49882.0 + throughput: 20.047311655506995 estimated_peak_memory_range: - min: 7917568 - max: 733741296 + min: 8617984 + max: 734619088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1030 - job_id: jped8evv5 + job_id: jpv6ln4k5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:40:43Z' + timestamp: '2024-12-11T23:06:06Z' - torchscript_onnx_tflite: - inference_time: 43169.0 - throughput: 23.164771016238504 + inference_time: 43029.0 + throughput: 23.240140370447836 estimated_peak_memory_range: min: 3158016 - max: 141235888 + max: 146984176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: j5we82qm5 + job_id: jgdxdm7ep job_status: Passed torchscript_onnx_qnn: - inference_time: 38318.0 - throughput: 26.0973954799311 + inference_time: 42991.0 + throughput: 23.260682468423624 estimated_peak_memory_range: min: 0 - max: 138200688 + max: 137413088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1029 - job_id: jp0zd3ln5 + job_id: jprvl710g job_status: Passed torchscript_onnx: - inference_time: 42840.0 - throughput: 23.34267040149393 + inference_time: 38265.0 + throughput: 26.133542401672546 estimated_peak_memory_range: min: 0 - max: 186049472 + max: 186080848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1030 - job_id: jgz38o7x5 + job_id: jgjvr81vg job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:40:45Z' + timestamp: '2024-12-11T23:06:07Z' - torchscript_onnx_tflite: - inference_time: 71114.0 - throughput: 14.061928734145175 + inference_time: 63251.0 + throughput: 15.810026718945155 estimated_peak_memory_range: - min: 73728 - max: 227238080 + min: 135168 + max: 225732128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jg9lkjw8g + job_id: j5welrm35 job_status: Passed torchscript_onnx_qnn: - inference_time: 62862.0 - throughput: 15.90786166523496 + inference_time: 63562.0 + throughput: 15.732670463484473 estimated_peak_memory_range: - min: 434176 - max: 1684464 + min: 389120 + max: 1665776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1029 - job_id: jp8q60zop + job_id: jp2krz3rp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:40:33Z' + timestamp: '2024-12-11T23:05:55Z' - torchscript_onnx_tflite: - inference_time: 3549899.0 - throughput: 0.28169815535596926 + inference_time: 3549947.0 + throughput: 0.28169434642263674 estimated_peak_memory_range: - min: 16384 - max: 140490256 + min: 0 + max: 140167104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jp147ye7p + job_id: jg9lzq9wg job_status: Passed torchscript_onnx_qnn: - inference_time: 3549353.0 - throughput: 0.28174148922352893 + inference_time: 3549384.0 + throughput: 0.28173902851875143 estimated_peak_memory_range: - min: 323584 - max: 5860896 + min: 618496 + max: 10882736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1029 - job_id: j5q6ze3op + job_id: jp0zmxe95 job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:40:35Z' + timestamp: '2024-12-11T23:05:58Z' - torchscript_onnx_tflite: - inference_time: 69977.0 - throughput: 14.290409706046272 + inference_time: 65163.0 + throughput: 15.346132007427528 estimated_peak_memory_range: - min: 3252224 - max: 45825272 + min: 106496 + max: 38491672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jgdx8eozp + job_id: jp14nmq8p job_status: Passed torchscript_onnx_qnn: - inference_time: 67209.0 - throughput: 14.878959663140353 + inference_time: 70133.0 + throughput: 14.258622902200106 estimated_peak_memory_range: - min: 421888 - max: 1757416 + min: 167936 + max: 4877112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1029 - job_id: jglvo6km5 + job_id: jp8qekwkp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:40:36Z' + timestamp: '2024-12-11T23:05:59Z' - torchscript_onnx_tflite: - inference_time: 113248.0 - throughput: 8.83017801638881 + inference_time: 113277.0 + throughput: 8.82791740600475 estimated_peak_memory_range: - min: 3162112 - max: 131507808 + min: 3231744 + max: 133097744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: j57yk0x95 + job_id: jgdxdm7rp job_status: Passed torchscript_onnx_qnn: - inference_time: 113219.0 - throughput: 8.832439784841768 + inference_time: 113213.0 + throughput: 8.832907881603703 estimated_peak_memory_range: - min: 405504 - max: 6539056 + min: 2854912 + max: 9020640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1029 - job_id: j56yre1yp + job_id: jgke2krwg job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:40:37Z' + timestamp: '2024-12-11T23:06:00Z' - torchscript_onnx_tflite: - inference_time: 68292.0 - throughput: 14.643003572892871 + inference_time: 71498.0 + throughput: 13.986405214131864 estimated_peak_memory_range: - min: 122880 - max: 227071128 + min: 3264512 + max: 45515960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jp4lmkv15 + job_id: j57ye8vv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 63047.0 - throughput: 15.861182927022698 + inference_time: 63007.0 + throughput: 15.871252400526926 estimated_peak_memory_range: - min: 401408 - max: 2116320 + min: 98304 + max: 4805408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1029 - job_id: jp3jxvmng + job_id: j5q6ld9np job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:40:39Z' + timestamp: '2024-12-11T23:06:01Z' - torchscript_onnx_tflite: - inference_time: 132705.0 - throughput: 7.535511096040089 + inference_time: 132664.0 + throughput: 7.537839956582042 estimated_peak_memory_range: - min: 3272704 - max: 143094112 + min: 0 + max: 138579696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jpxk3nyl5 + job_id: jp4ly2j85 job_status: Passed torchscript_onnx_qnn: - inference_time: 131262.0 - throughput: 7.618351084091359 + inference_time: 131264.0 + throughput: 7.618235007313506 estimated_peak_memory_range: - min: 0 - max: 5660048 + min: 471040 + max: 6625488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1029 - job_id: jgo2okvkp + job_id: jglvyqej5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:40:40Z' + timestamp: '2024-12-11T23:06:03Z' - torchscript_onnx_tflite: - inference_time: 133383.0 - throughput: 7.497207290284369 + inference_time: 131098.0 + throughput: 7.6278814322110176 estimated_peak_memory_range: - min: 3252224 - max: 111351520 + min: 3203072 + max: 109991200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jgn6ol3q5 + job_id: jpxklze35 job_status: Passed torchscript_onnx_qnn: - inference_time: 140758.0 - throughput: 7.1043919350942755 + inference_time: 148354.0 + throughput: 6.740633889210941 estimated_peak_memory_range: - min: 327680 - max: 89713888 + min: 212992 + max: 89660128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1029 - job_id: jpv6e0wr5 + job_id: jp3jzrq3g job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,13 +471,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:40:41Z' + timestamp: '2024-12-11T23:06:04Z' - torchscript_onnx_qnn: - inference_time: 65114.0 - throughput: 15.357680375956015 + inference_time: 65207.0 + throughput: 15.335776833775515 estimated_peak_memory_range: - min: 204800 - max: 204800 + min: 208896 + max: 208896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1029 - job_id: jgkeo73ng + job_id: jpy1oyv8p job_status: Passed torchscript_onnx: - inference_time: 65633.0 - throughput: 15.236237868145597 + inference_time: 65661.0 + throughput: 15.229740637516944 estimated_peak_memory_range: - min: 39731200 - max: 39731200 + min: 41213952 + max: 41213952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1030 - job_id: j5we829m5 + job_id: jped7n2o5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:40:46Z' + timestamp: '2024-12-11T23:06:08Z' diff --git a/qai_hub_models/models/regnet/perf.yaml b/qai_hub_models/models/regnet/perf.yaml index c5b511db..5e50f7a1 100644 --- a/qai_hub_models/models/regnet/perf.yaml +++ b/qai_hub_models/models/regnet/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: RegNet universal_assets: - torchscript_onnx_tflite: mno3549vn - torchscript_onnx: mng1p2jrn + torchscript_onnx_tflite: mqyv3r75q + torchscript_onnx: mnj4x51dn performance_metrics: - torchscript_onnx_tflite: - inference_time: 2054.0 - throughput: 486.8549172346641 + inference_time: 2060.0 + throughput: 485.43689320388347 estimated_peak_memory_range: min: 20480 - max: 117511544 + max: 128203216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 114 - job_id: jgkeo77ng + job_id: jp3jzrmlg job_status: Passed torchscript_onnx_qnn: - inference_time: 2113.0 - throughput: 473.260766682442 + inference_time: 2126.0 + throughput: 470.36688617121354 estimated_peak_memory_range: - min: 495616 - max: 83686944 + min: 638976 + max: 83555352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: jg9lkjy8g + job_id: j57ye8jl5 job_status: Passed torchscript_onnx: - inference_time: 2185.0 - throughput: 457.66590389016017 + inference_time: 2170.0 + throughput: 460.8294930875576 estimated_peak_memory_range: - min: 12288 - max: 45586440 + min: 446464 + max: 2157368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 190 - job_id: jp0zd32n5 + job_id: j5q6ld74p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:39:41Z' + timestamp: '2024-12-11T23:05:04Z' - torchscript_onnx_tflite: - inference_time: 1405.0 - throughput: 711.7437722419929 + inference_time: 1396.0 + throughput: 716.3323782234957 estimated_peak_memory_range: min: 16384 - max: 28648432 + max: 32775360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 114 - job_id: jglvo62m5 + job_id: jgo2l9vxp job_status: Passed torchscript_onnx_qnn: - inference_time: 1527.0 - throughput: 654.8788474132285 + inference_time: 1474.0 + throughput: 678.42605156038 estimated_peak_memory_range: - min: 0 - max: 28942144 + min: 618496 + max: 32906464 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: jp147yw7p + job_id: jp4ly2xv5 job_status: Passed torchscript_onnx: - inference_time: 1605.0 - throughput: 623.0529595015577 + inference_time: 1572.0 + throughput: 636.1323155216285 estimated_peak_memory_range: min: 0 - max: 152389696 + max: 154416672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 190 - job_id: jp8q60mop + job_id: jglvyq085 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:39:42Z' + timestamp: '2024-12-11T23:05:05Z' - torchscript_onnx_tflite: - inference_time: 1388.0 - throughput: 720.4610951008646 + inference_time: 1387.0 + throughput: 720.9805335255949 estimated_peak_memory_range: min: 12288 - max: 29359216 + max: 33330672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 114 - job_id: j56yrezyp + job_id: jpv6lnwj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1233.0 - throughput: 811.0300081103001 + inference_time: 1479.0 + throughput: 676.132521974307 estimated_peak_memory_range: - min: 614400 - max: 29478960 + min: 0 + max: 31355264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: jgdx8eqzp + job_id: jpxklz715 job_status: Passed torchscript_onnx: - inference_time: 1312.0 - throughput: 762.1951219512196 + inference_time: 1553.0 + throughput: 643.915003219575 estimated_peak_memory_range: min: 0 - max: 77870592 + max: 78487968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 190 - job_id: jgkeo7qng + job_id: j56y8030p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:39:43Z' + timestamp: '2024-12-11T23:05:06Z' - torchscript_onnx_tflite: - inference_time: 2023.0 - throughput: 494.3153732081068 + inference_time: 2035.0 + throughput: 491.4004914004914 estimated_peak_memory_range: - min: 16384 - max: 127707800 + min: 28672 + max: 117508240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 114 - job_id: jp3jxv1ng + job_id: jgjvr8lxg job_status: Passed torchscript_onnx_qnn: - inference_time: 2024.0 - throughput: 494.0711462450593 + inference_time: 2023.0 + throughput: 494.3153732081068 estimated_peak_memory_range: - min: 651264 - max: 1754816 + min: 626688 + max: 2344800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: j57yk0l95 + job_id: j5mn0lwwp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:39:31Z' + timestamp: '2024-12-11T23:04:55Z' - torchscript_onnx_tflite: - inference_time: 69226.0 - throughput: 14.445439574726258 + inference_time: 69247.0 + throughput: 14.441058818432568 estimated_peak_memory_range: - min: 77824 - max: 27961936 + min: 32768 + max: 28648720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 114 - job_id: jgo2oknkp + job_id: jped7nv15 job_status: Passed torchscript_onnx_qnn: - inference_time: 69730.0 - throughput: 14.34102968593145 + inference_time: 69713.0 + throughput: 14.344526845781992 estimated_peak_memory_range: - min: 0 - max: 5908032 + min: 589824 + max: 11113680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: jpxk3n6l5 + job_id: jprvl749g job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:39:34Z' + timestamp: '2024-12-11T23:04:57Z' - torchscript_onnx_tflite: - inference_time: 2076.0 - throughput: 481.6955684007707 + inference_time: 2026.0 + throughput: 493.58341559723596 estimated_peak_memory_range: - min: 20480 - max: 117268320 + min: 16384 + max: 106525288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 114 - job_id: jpv6e0rr5 + job_id: jgz3l07k5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2040.0 - throughput: 490.19607843137254 + inference_time: 2041.0 + throughput: 489.9559039686428 estimated_peak_memory_range: - min: 638976 - max: 1993824 + min: 634880 + max: 2156896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: j5mnoq69p + job_id: jp2krz74p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:39:35Z' + timestamp: '2024-12-11T23:04:58Z' - torchscript_onnx_tflite: - inference_time: 3547.0 - throughput: 281.928390188892 + inference_time: 3551.0 + throughput: 281.61081385525205 estimated_peak_memory_range: min: 16384 - max: 23372016 + max: 26754544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 114 - job_id: jgjvoz2eg + job_id: j5welrd65 job_status: Passed torchscript_onnx_qnn: - inference_time: 3715.0 - throughput: 269.17900403768505 + inference_time: 3538.0 + throughput: 282.6455624646693 estimated_peak_memory_range: min: 618496 - max: 6574784 + max: 6662768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: jgn6olmq5 + job_id: jpy1oy47p job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:39:36Z' + timestamp: '2024-12-11T23:04:59Z' - torchscript_onnx_tflite: - inference_time: 2048.0 - throughput: 488.28125 + inference_time: 2044.0 + throughput: 489.23679060665364 estimated_peak_memory_range: - min: 16384 - max: 117344920 + min: 20480 + max: 106778480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 114 - job_id: jped8ewv5 + job_id: jg9lzq3lg job_status: Passed torchscript_onnx_qnn: - inference_time: 2017.0 - throughput: 495.785820525533 + inference_time: 2028.0 + throughput: 493.0966469428008 estimated_peak_memory_range: - min: 630784 - max: 1958016 + min: 634880 + max: 2141960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: jprvo827g + job_id: jp0zmx165 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:39:37Z' + timestamp: '2024-12-11T23:05:01Z' - torchscript_onnx_tflite: - inference_time: 3959.0 - throughput: 252.5890376357666 + inference_time: 3942.0 + throughput: 253.67833587011668 estimated_peak_memory_range: min: 16384 - max: 27774896 + max: 29642576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 114 - job_id: jgz38ojx5 + job_id: jp14nmd2p job_status: Passed torchscript_onnx_qnn: - inference_time: 4120.0 - throughput: 242.71844660194174 + inference_time: 4135.0 + throughput: 241.8379685610641 estimated_peak_memory_range: - min: 393216 - max: 6277824 + min: 618496 + max: 6543440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: jp2k409qp + job_id: jp8qek3xp job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:39:38Z' + timestamp: '2024-12-11T23:05:02Z' - torchscript_onnx_tflite: - inference_time: 2835.0 - throughput: 352.7336860670194 + inference_time: 2808.0 + throughput: 356.1253561253561 estimated_peak_memory_range: min: 16384 - max: 25142576 + max: 26372608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 114 - job_id: j5we823m5 + job_id: jgdxdmrep job_status: Passed torchscript_onnx_qnn: - inference_time: 2950.0 - throughput: 338.9830508474576 + inference_time: 2959.0 + throughput: 337.95201081446436 estimated_peak_memory_range: min: 618496 - max: 25641856 + max: 25767936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: jpy1qrjlp + job_id: jgke2kl2g job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:39:40Z' + timestamp: '2024-12-11T23:05:03Z' - torchscript_onnx_qnn: - inference_time: 2226.0 - throughput: 449.23629829290206 + inference_time: 2213.0 + throughput: 451.8752824220515 estimated_peak_memory_range: min: 602112 max: 602112 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: jp4lmkd15 + job_id: jgn6zw9r5 job_status: Passed torchscript_onnx: - inference_time: 2204.0 - throughput: 453.7205081669691 + inference_time: 2227.0 + throughput: 449.034575662326 estimated_peak_memory_range: - min: 41693184 - max: 41693184 + min: 42864640 + max: 42864640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 190 - job_id: j5q6zerop + job_id: jp3jzr4lg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:39:44Z' + timestamp: '2024-12-11T23:05:08Z' diff --git a/qai_hub_models/models/regnet_quantized/evaluate.py b/qai_hub_models/models/regnet_quantized/evaluate.py index 24b2be32..dfc98897 100644 --- a/qai_hub_models/models/regnet_quantized/evaluate.py +++ b/qai_hub_models/models/regnet_quantized/evaluate.py @@ -25,6 +25,7 @@ def main(): model_cls=Model, default_split_size=2500, supported_datasets=SUPPORTED_DATASETS, + supports_onnx=False, is_hub_quantized=True, ) args = parser.parse_args() diff --git a/qai_hub_models/models/regnet_quantized/export.py b/qai_hub_models/models/regnet_quantized/export.py index ca3b6d10..b1e5941f 100644 --- a/qai_hub_models/models/regnet_quantized/export.py +++ b/qai_hub_models/models/regnet_quantized/export.py @@ -241,7 +241,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, is_hub_quantized=True) + parser = export_parser(model_cls=Model, supports_onnx=False, is_hub_quantized=True) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/regnet_quantized/info.yaml b/qai_hub_models/models/regnet_quantized/info.yaml index b922a3a7..a5e38ffc 100644 --- a/qai_hub_models/models/regnet_quantized/info.yaml +++ b/qai_hub_models/models/regnet_quantized/info.yaml @@ -11,6 +11,7 @@ use_case: Image Classification tags: - backbone - quantized +imsdk_supported: true research_paper: https://arxiv.org/abs/2003.13678 research_paper_title: Designing Network Design Spaces license: https://github.com/pytorch/vision/blob/main/LICENSE diff --git a/qai_hub_models/models/regnet_quantized/perf.yaml b/qai_hub_models/models/regnet_quantized/perf.yaml index 306ab646..d4be408f 100644 --- a/qai_hub_models/models/regnet_quantized/perf.yaml +++ b/qai_hub_models/models/regnet_quantized/perf.yaml @@ -47,14 +47,14 @@ aggregated: models: - name: RegNetQuantized universal_assets: - torchscript_onnx_tflite: mng1dwgrn + torchscript_onnx_tflite: mm5edow6m performance_metrics: - torchscript_onnx_tflite: - inference_time: 781.0 - throughput: 1280.4097311139565 + inference_time: 776.0 + throughput: 1288.659793814433 estimated_peak_memory_range: - min: 12288 - max: 320510448 + min: 20480 + max: 63706928 primary_compute_unit: NPU precision: int8 layer_info: @@ -62,14 +62,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 116 - job_id: j57ykvyl5 + job_id: j56y8jv0p job_status: Passed torchscript_onnx_qnn: - inference_time: 1028.0 - throughput: 972.7626459143969 + inference_time: 1024.0 + throughput: 976.5625 estimated_peak_memory_range: min: 16384 - max: 62410536 + max: 62761536 primary_compute_unit: NPU precision: int8 layer_info: @@ -77,7 +77,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jglvoev85 + job_id: jpxklj315 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -86,13 +86,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T03:52:56Z' + timestamp: '2024-12-12T01:21:34Z' - torchscript_onnx_tflite: - inference_time: 531.0 - throughput: 1883.2391713747645 + inference_time: 536.0 + throughput: 1865.6716417910447 estimated_peak_memory_range: - min: 16384 - max: 30893536 + min: 0 + max: 36321792 primary_compute_unit: NPU precision: int8 layer_info: @@ -100,14 +100,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 116 - job_id: jp4lmjlv5 + job_id: jp3jz38lg job_status: Passed torchscript_onnx_qnn: - inference_time: 740.0 - throughput: 1351.3513513513512 + inference_time: 754.0 + throughput: 1326.2599469496022 estimated_peak_memory_range: min: 163840 - max: 32586528 + max: 34382832 primary_compute_unit: NPU precision: int8 layer_info: @@ -115,7 +115,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: j56yrqy0p + job_id: j5mn02owp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -124,13 +124,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T03:52:58Z' + timestamp: '2024-12-12T01:21:36Z' - torchscript_onnx_tflite: - inference_time: 533.0 - throughput: 1876.172607879925 + inference_time: 532.0 + throughput: 1879.6992481203008 estimated_peak_memory_range: min: 8192 - max: 29715232 + max: 32793904 primary_compute_unit: NPU precision: int8 layer_info: @@ -138,14 +138,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 116 - job_id: jpxk3ek15 + job_id: jgo2l0mxp job_status: Passed torchscript_onnx_qnn: - inference_time: 742.0 - throughput: 1347.7088948787061 + inference_time: 767.0 + throughput: 1303.7809647979138 estimated_peak_memory_range: min: 159744 - max: 32120304 + max: 32920944 primary_compute_unit: NPU precision: int8 layer_info: @@ -153,7 +153,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jp3jxqjlg + job_id: jgn6zyor5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -162,13 +162,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T03:53:00Z' + timestamp: '2024-12-12T01:21:38Z' - torchscript_onnx_tflite: - inference_time: 2378.0 - throughput: 420.52144659377626 + inference_time: 2437.0 + throughput: 410.3405826836274 estimated_peak_memory_range: min: 12288 - max: 33287952 + max: 36212496 primary_compute_unit: NPU precision: int8 layer_info: @@ -176,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 116 - job_id: j5mnovnwp + job_id: jpv6lo4j5 job_status: Passed torchscript_onnx_qnn: - inference_time: 4021.0 - throughput: 248.69435463814972 + inference_time: 4196.0 + throughput: 238.32221163012392 estimated_peak_memory_range: - min: 36864 - max: 8063280 + min: 163840 + max: 11686800 primary_compute_unit: NPU precision: int8 layer_info: @@ -191,7 +191,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jgo2oe2xp + job_id: jprvlqo9g job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -200,13 +200,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T03:52:39Z' + timestamp: '2024-12-12T01:21:40Z' - torchscript_onnx_tflite: - inference_time: 44075.0 - throughput: 22.688598979013047 + inference_time: 42143.0 + throughput: 23.728733122938568 estimated_peak_memory_range: - min: 122880 - max: 63365448 + min: 192512 + max: 3900680 primary_compute_unit: GPU precision: int8 layer_info: @@ -214,7 +214,7 @@ models: layers_on_gpu: 91 layers_on_cpu: 13 total_layers: 116 - job_id: jgn6or6r5 + job_id: jgjvrm1xg job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -223,13 +223,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T03:52:18Z' + timestamp: '2024-12-12T01:21:20Z' - torchscript_onnx_tflite: - inference_time: 766.0 - throughput: 1305.4830287206266 + inference_time: 772.0 + throughput: 1295.3367875647668 estimated_peak_memory_range: - min: 16384 - max: 62412560 + min: 53248 + max: 64383968 primary_compute_unit: NPU precision: int8 layer_info: @@ -237,14 +237,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 116 - job_id: jprvo1v9g + job_id: jgz3l9wk5 job_status: Passed torchscript_onnx_qnn: - inference_time: 957.0 - throughput: 1044.932079414838 + inference_time: 964.0 + throughput: 1037.344398340249 estimated_peak_memory_range: - min: 184320 - max: 1374016 + min: 180224 + max: 1475160 primary_compute_unit: NPU precision: int8 layer_info: @@ -252,7 +252,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jpv6ez6j5 + job_id: jp2kr644p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -261,13 +261,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T03:52:41Z' + timestamp: '2024-12-12T01:21:41Z' - torchscript_onnx_tflite: - inference_time: 9421.0 - throughput: 106.14584439019212 + inference_time: 9402.0 + throughput: 106.36034886194426 estimated_peak_memory_range: min: 16384 - max: 29445136 + max: 30570608 primary_compute_unit: NPU precision: int8 layer_info: @@ -275,14 +275,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 116 - job_id: jp2k43k4p + job_id: j5welv865 job_status: Passed torchscript_onnx_qnn: - inference_time: 10225.0 - throughput: 97.79951100244499 + inference_time: 10222.0 + throughput: 97.82821365681863 estimated_peak_memory_range: - min: 98304 - max: 5915008 + min: 94208 + max: 10638800 primary_compute_unit: NPU precision: int8 layer_info: @@ -290,7 +290,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jped84d15 + job_id: jp0zmqd65 job_status: Passed reference_device_info: name: SA7255P ADP @@ -299,13 +299,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T03:52:44Z' + timestamp: '2024-12-12T01:21:45Z' - torchscript_onnx_tflite: - inference_time: 777.0 - throughput: 1287.001287001287 + inference_time: 768.0 + throughput: 1302.0833333333333 estimated_peak_memory_range: - min: 16384 - max: 14071080 + min: 20480 + max: 62032744 primary_compute_unit: NPU precision: int8 layer_info: @@ -313,14 +313,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 116 - job_id: jpy1qv17p + job_id: jg9lz1klg job_status: Passed torchscript_onnx_qnn: - inference_time: 981.0 - throughput: 1019.367991845056 + inference_time: 965.0 + throughput: 1036.2694300518135 estimated_peak_memory_range: - min: 172032 - max: 1637040 + min: 176128 + max: 1490056 primary_compute_unit: NPU precision: int8 layer_info: @@ -328,7 +328,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jgz38v3k5 + job_id: jp8qe96xp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -337,13 +337,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T03:52:46Z' + timestamp: '2024-12-12T01:21:47Z' - torchscript_onnx_tflite: - inference_time: 1374.0 - throughput: 727.802037845706 + inference_time: 1366.0 + throughput: 732.0644216691069 estimated_peak_memory_range: - min: 24576 - max: 27865648 + min: 28672 + max: 32615536 primary_compute_unit: NPU precision: int8 layer_info: @@ -351,14 +351,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 116 - job_id: jp0zdez65 + job_id: jp14nl72p job_status: Passed torchscript_onnx_qnn: inference_time: 1660.0 throughput: 602.4096385542168 estimated_peak_memory_range: - min: 0 - max: 5819536 + min: 163840 + max: 5827936 primary_compute_unit: NPU precision: int8 layer_info: @@ -366,7 +366,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: j5we8mw65 + job_id: jgke2no2g job_status: Passed reference_device_info: name: SA8295P ADP @@ -375,13 +375,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T03:52:48Z' + timestamp: '2024-12-12T01:21:48Z' - torchscript_onnx_tflite: - inference_time: 776.0 - throughput: 1288.659793814433 + inference_time: 782.0 + throughput: 1278.772378516624 estimated_peak_memory_range: min: 16384 - max: 254977552 + max: 63758376 primary_compute_unit: NPU precision: int8 layer_info: @@ -389,14 +389,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 116 - job_id: jp8q6wqxp + job_id: jgdxd98ep job_status: Passed torchscript_onnx_qnn: - inference_time: 966.0 - throughput: 1035.1966873706003 + inference_time: 970.0 + throughput: 1030.9278350515465 estimated_peak_memory_range: - min: 184320 - max: 1544496 + min: 192512 + max: 1318048 primary_compute_unit: NPU precision: int8 layer_info: @@ -404,7 +404,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jg9lk90lg + job_id: j5q6lkz4p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -413,13 +413,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T03:52:50Z' + timestamp: '2024-12-12T01:21:50Z' - torchscript_onnx_tflite: - inference_time: 1277.0 - throughput: 783.0853563038371 + inference_time: 1307.0 + throughput: 765.1109410864575 estimated_peak_memory_range: - min: 45056 - max: 29929776 + min: 16384 + max: 33298112 primary_compute_unit: NPU precision: int8 layer_info: @@ -427,14 +427,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 116 - job_id: jgkeore2g + job_id: j57yewkl5 job_status: Passed torchscript_onnx_qnn: inference_time: 1684.0 throughput: 593.8242280285035 estimated_peak_memory_range: min: 167936 - max: 6055728 + max: 6136672 primary_compute_unit: NPU precision: int8 layer_info: @@ -442,7 +442,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jp147q22p + job_id: jglvyzo85 job_status: Passed reference_device_info: name: SA8775P ADP @@ -451,13 +451,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T03:52:52Z' + timestamp: '2024-12-12T01:21:52Z' - torchscript_onnx_tflite: - inference_time: 888.0 - throughput: 1126.126126126126 + inference_time: 905.0 + throughput: 1104.9723756906078 estimated_peak_memory_range: min: 20480 - max: 34726576 + max: 41211056 primary_compute_unit: NPU precision: int8 layer_info: @@ -465,14 +465,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 116 - job_id: j5q6z964p + job_id: jp4lyomv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1249.0 - throughput: 800.640512409928 + inference_time: 1214.0 + throughput: 823.7232289950576 estimated_peak_memory_range: min: 163840 - max: 35632688 + max: 36812480 primary_compute_unit: NPU precision: int8 layer_info: @@ -480,7 +480,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jgdx87nep + job_id: j56y8jr0p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -489,13 +489,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T03:52:54Z' + timestamp: '2024-12-12T01:21:54Z' - torchscript_onnx_qnn: - inference_time: 1126.0 - throughput: 888.0994671403197 + inference_time: 3165.0 + throughput: 315.955766192733 estimated_peak_memory_range: - min: 491520 - max: 491520 + min: 434176 + max: 434176 primary_compute_unit: NPU precision: int8 layer_info: @@ -503,7 +503,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jgjvokvxg + job_id: jpy1owq7p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -512,4 +512,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T03:53:01Z' + timestamp: '2024-12-12T01:21:43Z' diff --git a/qai_hub_models/models/resnet101/perf.yaml b/qai_hub_models/models/resnet101/perf.yaml index 37617b0e..4a9a1154 100644 --- a/qai_hub_models/models/resnet101/perf.yaml +++ b/qai_hub_models/models/resnet101/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: ResNet101 universal_assets: - torchscript_onnx_tflite: mmdyr0x0m - torchscript_onnx: mqpz0kr0n + torchscript_onnx_tflite: mnj4x52kn + torchscript_onnx: mmxe72yyn performance_metrics: - torchscript_onnx_tflite: - inference_time: 3424.0 - throughput: 292.05607476635515 + inference_time: 3408.0 + throughput: 293.42723004694835 estimated_peak_memory_range: - min: 16384 - max: 293163344 + min: 81920 + max: 251427952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: j57yk0295 + job_id: jprvl7e9g job_status: Passed torchscript_onnx_qnn: - inference_time: 3455.0 - throughput: 289.4356005788712 + inference_time: 3538.0 + throughput: 282.6455624646693 estimated_peak_memory_range: - min: 618496 - max: 209619792 + min: 12288 + max: 167190968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jgkeo7vng + job_id: jpv6lnvj5 job_status: Passed torchscript_onnx: - inference_time: 3493.0 - throughput: 286.286859433152 + inference_time: 3564.0 + throughput: 280.58361391694723 estimated_peak_memory_range: - min: 12288 - max: 102569784 + min: 622592 + max: 3360152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: jp147yy7p + job_id: j5mn0lzwp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:38:54Z' + timestamp: '2024-12-11T23:04:17Z' - torchscript_onnx_tflite: - inference_time: 2439.0 - throughput: 410.0041000410004 + inference_time: 2429.0 + throughput: 411.6920543433512 estimated_peak_memory_range: - min: 16384 - max: 35985792 + min: 1265664 + max: 40645328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jp4lmkn15 + job_id: jp2krzl4p job_status: Passed torchscript_onnx_qnn: - inference_time: 2495.0 - throughput: 400.80160320641284 + inference_time: 2489.0 + throughput: 401.76777822418643 estimated_peak_memory_range: min: 618496 - max: 37213376 + max: 38403920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j5q6ze0op + job_id: jgjvr8exg job_status: Passed torchscript_onnx: - inference_time: 2593.0 - throughput: 385.6536829926726 + inference_time: 2514.0 + throughput: 397.77247414478916 estimated_peak_memory_range: min: 0 - max: 123170624 + max: 124119120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: jgdx8eezp + job_id: jgn6zwer5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:38:55Z' + timestamp: '2024-12-11T23:04:18Z' - torchscript_onnx_tflite: - inference_time: 2346.0 - throughput: 426.25745950554136 + inference_time: 2384.0 + throughput: 419.46308724832215 estimated_peak_memory_range: min: 12288 - max: 34376208 + max: 36442000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jpxk3n9l5 + job_id: jpy1oy67p job_status: Passed torchscript_onnx_qnn: - inference_time: 2403.0 - throughput: 416.1464835622139 + inference_time: 2406.0 + throughput: 415.6275976724855 estimated_peak_memory_range: - min: 0 - max: 32870416 + min: 614400 + max: 33946768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jglvo66m5 + job_id: jped7nk15 job_status: Passed torchscript_onnx: - inference_time: 2518.0 - throughput: 397.1405877680699 + inference_time: 2512.0 + throughput: 398.0891719745223 estimated_peak_memory_range: min: 0 - max: 47784960 + max: 48620320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: j57yk0095 + job_id: jprvl7y9g job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:38:56Z' + timestamp: '2024-12-11T23:04:19Z' - torchscript_onnx_tflite: - inference_time: 3379.0 - throughput: 295.9455460195324 + inference_time: 3397.0 + throughput: 294.3773918163085 estimated_peak_memory_range: min: 16384 - max: 303636728 + max: 251212280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: j5mnoqe9p + job_id: jp0zmxl65 job_status: Passed torchscript_onnx_qnn: - inference_time: 3290.0 - throughput: 303.951367781155 + inference_time: 3278.0 + throughput: 305.0640634533252 estimated_peak_memory_range: - min: 630784 - max: 2401104 + min: 626688 + max: 1945296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j56yreeyp + job_id: jgz3l0rk5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:38:44Z' + timestamp: '2024-12-11T23:04:08Z' - torchscript_onnx_tflite: - inference_time: 135286.0 - throughput: 7.391747852697248 + inference_time: 135374.0 + throughput: 7.386942839836306 estimated_peak_memory_range: - min: 143360 - max: 32984144 + min: 69632 + max: 33388592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jgn6ol0q5 + job_id: jp8qekzxp job_status: Passed torchscript_onnx_qnn: - inference_time: 136421.0 - throughput: 7.3302497416086965 + inference_time: 136379.0 + throughput: 7.332507204188328 estimated_peak_memory_range: - min: 749568 - max: 6625984 + min: 712704 + max: 11223232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jgo2okkkp + job_id: jg9lzq4lg job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:38:47Z' + timestamp: '2024-12-11T23:04:10Z' - torchscript_onnx_tflite: - inference_time: 3368.0 - throughput: 296.91211401425176 + inference_time: 3396.0 + throughput: 294.4640753828033 estimated_peak_memory_range: - min: 32768 - max: 282505368 + min: 16384 + max: 262026512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jprvo867g + job_id: jgke2k32g job_status: Passed torchscript_onnx_qnn: - inference_time: 3334.0 - throughput: 299.9400119976005 + inference_time: 3317.0 + throughput: 301.4772384684956 estimated_peak_memory_range: - min: 667648 - max: 2359600 + min: 634880 + max: 1931288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jpv6e00r5 + job_id: jp14nm82p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:38:48Z' + timestamp: '2024-12-11T23:04:11Z' - torchscript_onnx_tflite: - inference_time: 5700.0 - throughput: 175.43859649122808 + inference_time: 5717.0 + throughput: 174.91691446562882 estimated_peak_memory_range: min: 20480 - max: 19851120 + max: 22100192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jp2k40xqp + job_id: j5q6ld34p job_status: Passed torchscript_onnx_qnn: - inference_time: 6342.0 - throughput: 157.67896562598548 + inference_time: 6207.0 + throughput: 161.10842597067827 estimated_peak_memory_range: - min: 659456 - max: 6608432 + min: 655360 + max: 6907184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jgjvozzeg + job_id: jgdxdmvep job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:38:49Z' + timestamp: '2024-12-11T23:04:12Z' - torchscript_onnx_tflite: - inference_time: 3381.0 - throughput: 295.77048210588583 + inference_time: 3413.0 + throughput: 292.99736302373276 estimated_peak_memory_range: - min: 20480 - max: 282459368 + min: 24576 + max: 241289728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jpy1qrzlp + job_id: jglvyq385 job_status: Passed torchscript_onnx_qnn: - inference_time: 3282.0 - throughput: 304.69226081657524 + inference_time: 3352.0 + throughput: 298.32935560859187 estimated_peak_memory_range: - min: 106496 - max: 1342896 + min: 630784 + max: 2028240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jped8eev5 + job_id: j57ye8dl5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:38:50Z' + timestamp: '2024-12-11T23:04:13Z' - torchscript_onnx_tflite: - inference_time: 6643.0 - throughput: 150.5343971097396 + inference_time: 6657.0 + throughput: 150.2178158329578 estimated_peak_memory_range: - min: 45056 - max: 32552144 + min: 57344 + max: 34439120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jp0zd34n5 + job_id: jp3jzrelg job_status: Passed torchscript_onnx_qnn: - inference_time: 6740.0 - throughput: 148.3679525222552 + inference_time: 6703.0 + throughput: 149.18693122482472 estimated_peak_memory_range: - min: 0 - max: 5872560 + min: 618496 + max: 6542992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jgz38oox5 + job_id: jp4ly2wv5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:38:51Z' + timestamp: '2024-12-11T23:04:15Z' - torchscript_onnx_tflite: - inference_time: 4762.0 - throughput: 209.99580008399832 + inference_time: 4774.0 + throughput: 209.46795140343528 estimated_peak_memory_range: min: 16384 - max: 25194512 + max: 25912720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jp8q602op + job_id: jgo2l93xp job_status: Passed torchscript_onnx_qnn: - inference_time: 4855.0 - throughput: 205.97322348094747 + inference_time: 4808.0 + throughput: 207.98668885191347 estimated_peak_memory_range: - min: 634880 - max: 22673152 + min: 618496 + max: 25051984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j5we822m5 + job_id: jpxklz115 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:38:53Z' + timestamp: '2024-12-11T23:04:16Z' - torchscript_onnx_qnn: - inference_time: 3461.0 - throughput: 288.93383415197917 + inference_time: 3475.0 + throughput: 287.76978417266184 estimated_peak_memory_range: min: 602112 max: 602112 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jp3jxvvng + job_id: j5welr965 job_status: Passed torchscript_onnx: - inference_time: 3480.0 - throughput: 287.35632183908046 + inference_time: 3513.0 + throughput: 284.6569883290635 estimated_peak_memory_range: - min: 90722304 - max: 90722304 + min: 91934720 + max: 91934720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: jp4lmkk15 + job_id: jp2krzm4p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:38:57Z' + timestamp: '2024-12-11T23:04:20Z' diff --git a/qai_hub_models/models/resnet101_quantized/evaluate.py b/qai_hub_models/models/resnet101_quantized/evaluate.py index 9aba239a..51824190 100644 --- a/qai_hub_models/models/resnet101_quantized/evaluate.py +++ b/qai_hub_models/models/resnet101_quantized/evaluate.py @@ -25,6 +25,7 @@ def main(): model_cls=Model, default_split_size=2500, supported_datasets=SUPPORTED_DATASETS, + supports_onnx=False, is_hub_quantized=True, ) args = parser.parse_args() diff --git a/qai_hub_models/models/resnet101_quantized/export.py b/qai_hub_models/models/resnet101_quantized/export.py index 93c7feb0..c5e6b5c2 100644 --- a/qai_hub_models/models/resnet101_quantized/export.py +++ b/qai_hub_models/models/resnet101_quantized/export.py @@ -241,7 +241,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, is_hub_quantized=True) + parser = export_parser(model_cls=Model, supports_onnx=False, is_hub_quantized=True) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/resnet101_quantized/info.yaml b/qai_hub_models/models/resnet101_quantized/info.yaml index 1fe4cf48..768c83bf 100644 --- a/qai_hub_models/models/resnet101_quantized/info.yaml +++ b/qai_hub_models/models/resnet101_quantized/info.yaml @@ -11,6 +11,7 @@ description: ResNet101 is a machine learning model that can classify images from tags: - backbone - quantized +imsdk_supported: true research_paper: https://arxiv.org/abs/1512.03385 research_paper_title: Deep Residual Learning for Image Recognition license: https://github.com/pytorch/vision/blob/main/LICENSE diff --git a/qai_hub_models/models/resnet101_quantized/perf.yaml b/qai_hub_models/models/resnet101_quantized/perf.yaml index affcb13f..fd69429b 100644 --- a/qai_hub_models/models/resnet101_quantized/perf.yaml +++ b/qai_hub_models/models/resnet101_quantized/perf.yaml @@ -50,14 +50,14 @@ aggregated: models: - name: ResNet101Quantized universal_assets: - torchscript_onnx_tflite: mnwe13pxn + torchscript_onnx_tflite: mngg1oy5n performance_metrics: - torchscript_onnx_tflite: - inference_time: 1169.0 - throughput: 855.4319931565441 + inference_time: 1171.0 + throughput: 853.9709649871904 estimated_peak_memory_range: min: 16384 - max: 58444040 + max: 48146472 primary_compute_unit: NPU precision: int8 layer_info: @@ -65,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jp2k47n4p + job_id: jgke2nrog job_status: Passed torchscript_onnx_qnn: - inference_time: 1378.0 - throughput: 725.6894049346879 + inference_time: 1386.0 + throughput: 721.5007215007215 estimated_peak_memory_range: - min: 0 - max: 185794976 + min: 225280 + max: 57498120 primary_compute_unit: NPU precision: int8 layer_info: @@ -80,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jped8r315 + job_id: jp14nlqlp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -89,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T03:51:41Z' + timestamp: '2024-12-12T01:20:22Z' - torchscript_onnx_tflite: - inference_time: 883.0 - throughput: 1132.5028312570782 + inference_time: 876.0 + throughput: 1141.552511415525 estimated_peak_memory_range: - min: 16384 - max: 38579264 + min: 0 + max: 39786000 primary_compute_unit: NPU precision: int8 layer_info: @@ -103,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jpy1q407p + job_id: j5q6lk9mp job_status: Passed torchscript_onnx_qnn: - inference_time: 1070.0 - throughput: 934.5794392523364 + inference_time: 1038.0 + throughput: 963.3911368015414 estimated_peak_memory_range: - min: 0 - max: 21124512 + min: 163840 + max: 30313440 primary_compute_unit: NPU precision: int8 layer_info: @@ -118,7 +118,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jgz38xkk5 + job_id: jgdxd97lp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -127,13 +127,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T03:51:43Z' + timestamp: '2024-12-12T01:20:24Z' - torchscript_onnx_tflite: - inference_time: 797.0 - throughput: 1254.7051442910915 + inference_time: 796.0 + throughput: 1256.2814070351758 estimated_peak_memory_range: - min: 32768 - max: 25140128 + min: 0 + max: 27444928 primary_compute_unit: NPU precision: int8 layer_info: @@ -141,14 +141,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jp0zd1765 + job_id: jglvyzel5 job_status: Passed torchscript_onnx_qnn: - inference_time: 950.0 - throughput: 1052.6315789473683 + inference_time: 962.0 + throughput: 1039.5010395010395 estimated_peak_memory_range: min: 159744 - max: 23309696 + max: 26300448 primary_compute_unit: NPU precision: int8 layer_info: @@ -156,7 +156,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: j5we8m665 + job_id: j5welvx65 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -165,13 +165,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T03:51:44Z' + timestamp: '2024-12-12T01:20:26Z' - torchscript_onnx_tflite: - inference_time: 4668.0 - throughput: 214.22450728363324 + inference_time: 4516.0 + throughput: 221.43489813994685 estimated_peak_memory_range: min: 12288 - max: 30549680 + max: 33715264 primary_compute_unit: NPU precision: int8 layer_info: @@ -179,14 +179,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jp8q63vxp + job_id: j56y8jq7p job_status: Passed torchscript_onnx_qnn: - inference_time: 6523.0 - throughput: 153.30369461904033 + inference_time: 6590.0 + throughput: 151.74506828528072 estimated_peak_memory_range: - min: 167936 - max: 8159136 + min: 163840 + max: 7244448 primary_compute_unit: NPU precision: int8 layer_info: @@ -194,7 +194,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jg9lk9nlg + job_id: jg9lz18lg job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -203,13 +203,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T03:51:24Z' + timestamp: '2024-12-12T01:20:28Z' - torchscript_onnx_tflite: - inference_time: 17433.0 - throughput: 57.36247346985602 + inference_time: 17260.0 + throughput: 57.93742757821553 estimated_peak_memory_range: - min: 61440 - max: 7549136 + min: 36864 + max: 2339176 primary_compute_unit: NPU precision: int8 layer_info: @@ -217,7 +217,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jgkeolm2g + job_id: jp3jz3qzg job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -226,13 +226,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T03:51:04Z' + timestamp: '2024-12-12T01:20:08Z' - torchscript_onnx_tflite: - inference_time: 1161.0 - throughput: 861.3264427217915 + inference_time: 1164.0 + throughput: 859.106529209622 estimated_peak_memory_range: min: 16384 - max: 11505784 + max: 58696016 primary_compute_unit: NPU precision: int8 layer_info: @@ -240,14 +240,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: j5q6z7o4p + job_id: jgo2l0edp job_status: Passed torchscript_onnx_qnn: inference_time: 1325.0 throughput: 754.7169811320755 estimated_peak_memory_range: - min: 184320 - max: 1462192 + min: 180224 + max: 1355536 primary_compute_unit: NPU precision: int8 layer_info: @@ -255,7 +255,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jgdx871ep + job_id: jp14nl32p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -264,13 +264,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T03:51:26Z' + timestamp: '2024-12-12T01:20:30Z' - torchscript_onnx_tflite: - inference_time: 16080.0 - throughput: 62.18905472636816 + inference_time: 16036.0 + throughput: 62.35969069593415 estimated_peak_memory_range: - min: 32768 - max: 22883376 + min: 36864 + max: 23971152 primary_compute_unit: NPU precision: int8 layer_info: @@ -278,14 +278,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jglvo0r85 + job_id: jpv6lozm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 16755.0 - throughput: 59.68367651447329 + inference_time: 16766.0 + throughput: 59.64451866873434 estimated_peak_memory_range: - min: 208896 - max: 7502896 + min: 126976 + max: 10521024 primary_compute_unit: NPU precision: int8 layer_info: @@ -293,7 +293,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jp4lmjrv5 + job_id: j57yew6l5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -302,13 +302,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T03:51:30Z' + timestamp: '2024-12-12T01:20:33Z' - torchscript_onnx_tflite: - inference_time: 1170.0 - throughput: 854.7008547008547 + inference_time: 1163.0 + throughput: 859.8452278589854 estimated_peak_memory_range: - min: 16384 - max: 58987432 + min: 20480 + max: 59002672 primary_compute_unit: NPU precision: int8 layer_info: @@ -316,14 +316,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: j56yr3l0p + job_id: jgjvrmk8g job_status: Passed torchscript_onnx_qnn: - inference_time: 1333.0 - throughput: 750.1875468867216 + inference_time: 1327.0 + throughput: 753.5795026375282 estimated_peak_memory_range: - min: 176128 - max: 1426760 + min: 180224 + max: 1391320 primary_compute_unit: NPU precision: int8 layer_info: @@ -331,7 +331,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jpxk3eo15 + job_id: jp4lyo8v5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -340,13 +340,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T03:51:31Z' + timestamp: '2024-12-12T01:20:35Z' - torchscript_onnx_tflite: inference_time: 1779.0 throughput: 562.1135469364812 estimated_peak_memory_range: - min: 16384 - max: 23329648 + min: 32768 + max: 24826960 primary_compute_unit: NPU precision: int8 layer_info: @@ -354,14 +354,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jp3jx42lg + job_id: jped71405 job_status: Passed torchscript_onnx_qnn: - inference_time: 2255.0 - throughput: 443.4589800443459 + inference_time: 2063.0 + throughput: 484.7309743092584 estimated_peak_memory_range: min: 0 - max: 5917840 + max: 5906944 primary_compute_unit: NPU precision: int8 layer_info: @@ -369,7 +369,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: j5mnovxwp + job_id: jpxkljm15 job_status: Passed reference_device_info: name: SA8295P ADP @@ -378,13 +378,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T03:51:33Z' + timestamp: '2024-12-12T01:20:37Z' - torchscript_onnx_tflite: - inference_time: 1168.0 - throughput: 856.1643835616438 + inference_time: 1169.0 + throughput: 855.4319931565441 estimated_peak_memory_range: - min: 24576 - max: 8249008 + min: 32768 + max: 184140888 primary_compute_unit: NPU precision: int8 layer_info: @@ -392,14 +392,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jgo2o1qxp + job_id: jgz3l9v65 job_status: Passed torchscript_onnx_qnn: - inference_time: 1327.0 - throughput: 753.5795026375282 + inference_time: 1331.0 + throughput: 751.3148009015778 estimated_peak_memory_range: - min: 192512 - max: 1516512 + min: 176128 + max: 1401952 primary_compute_unit: NPU precision: int8 layer_info: @@ -407,7 +407,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jgn6orvr5 + job_id: j5mn024wp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -416,13 +416,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T03:51:35Z' + timestamp: '2024-12-12T01:20:39Z' - torchscript_onnx_tflite: - inference_time: 1457.0 - throughput: 686.3417982155113 + inference_time: 1447.0 + throughput: 691.0850034554251 estimated_peak_memory_range: - min: 4096 - max: 23366464 + min: 16384 + max: 27099344 primary_compute_unit: NPU precision: int8 layer_info: @@ -430,14 +430,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jpv6e1xj5 + job_id: j5welvmj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1847.0 - throughput: 541.4185165132648 + inference_time: 1822.0 + throughput: 548.847420417124 estimated_peak_memory_range: - min: 0 - max: 5797152 + min: 94208 + max: 5964576 primary_compute_unit: NPU precision: int8 layer_info: @@ -445,7 +445,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jprvo139g + job_id: jgn6zyxr5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -454,13 +454,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T03:51:37Z' + timestamp: '2024-12-12T01:20:41Z' - torchscript_onnx_tflite: - inference_time: 1364.0 - throughput: 733.1378299120234 + inference_time: 1359.0 + throughput: 735.8351729212657 estimated_peak_memory_range: min: 16384 - max: 39949136 + max: 43869008 primary_compute_unit: NPU precision: int8 layer_info: @@ -468,14 +468,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jgjvo04xg + job_id: jg9lz19vg job_status: Passed torchscript_onnx_qnn: - inference_time: 1583.0 - throughput: 631.7119393556538 + inference_time: 1603.0 + throughput: 623.8303181534623 estimated_peak_memory_range: - min: 163840 - max: 24762528 + min: 188416 + max: 27355088 primary_compute_unit: NPU precision: int8 layer_info: @@ -483,7 +483,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jp2k43y4p + job_id: jprvlq99g job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -492,13 +492,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T03:51:39Z' + timestamp: '2024-12-12T01:20:43Z' - torchscript_onnx_qnn: - inference_time: 1326.0 - throughput: 754.1478129713424 + inference_time: 1314.0 + throughput: 761.03500761035 estimated_peak_memory_range: - min: 393216 - max: 393216 + min: 339968 + max: 339968 primary_compute_unit: NPU precision: int8 layer_info: @@ -506,7 +506,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: j57ykvrl5 + job_id: jgdxd90ep job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -515,4 +515,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T03:51:46Z' + timestamp: '2024-12-12T01:20:31Z' diff --git a/qai_hub_models/models/resnet18/perf.yaml b/qai_hub_models/models/resnet18/perf.yaml index bad1f34d..5b4642df 100644 --- a/qai_hub_models/models/resnet18/perf.yaml +++ b/qai_hub_models/models/resnet18/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: ResNet18 universal_assets: - torchscript_onnx_tflite: mn41rorzn - torchscript_onnx: mnwezvwxn + torchscript_onnx_tflite: mnw8e7g3n + torchscript_onnx: mnz1v9ozq performance_metrics: - torchscript_onnx_tflite: - inference_time: 1387.0 - throughput: 720.9805335255949 + inference_time: 1393.0 + throughput: 717.8750897343862 estimated_peak_memory_range: - min: 16384 - max: 134347304 + min: 28672 + max: 124157856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: jped8edo5 + job_id: jgdxdmqep job_status: Passed torchscript_onnx_qnn: - inference_time: 1466.0 - throughput: 682.1282401091405 + inference_time: 1469.0 + throughput: 680.7351940095303 estimated_peak_memory_range: - min: 348160 - max: 71965008 + min: 630784 + max: 3965792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jprvo8v0g + job_id: jp8qekmxp job_status: Passed torchscript_onnx: - inference_time: 1318.0 - throughput: 758.7253414264036 + inference_time: 1331.0 + throughput: 751.3148009015778 estimated_peak_memory_range: - min: 372736 - max: 2621544 + min: 20480 + max: 26570936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: jpv6e0qk5 + job_id: j5welrq65 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:38:05Z' + timestamp: '2024-12-11T23:03:29Z' - torchscript_onnx_tflite: - inference_time: 970.0 - throughput: 1030.9278350515465 + inference_time: 964.0 + throughput: 1037.344398340249 estimated_peak_memory_range: min: 16384 - max: 16011216 + max: 17879792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: jgz38o3o5 + job_id: j57ye8ll5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1007.0 - throughput: 993.0486593843099 + inference_time: 1010.0 + throughput: 990.0990099009902 estimated_peak_memory_range: - min: 0 - max: 16569360 + min: 618496 + max: 18914112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jp2k40krp + job_id: jgke2kq2g job_status: Passed torchscript_onnx: - inference_time: 970.0 - throughput: 1030.9278350515465 + inference_time: 967.0 + throughput: 1034.126163391934 estimated_peak_memory_range: min: 0 - max: 29945616 + max: 32293488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: jgjvozdvg + job_id: jg9lzqwlg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:38:06Z' + timestamp: '2024-12-11T23:03:30Z' - torchscript_onnx_tflite: - inference_time: 932.0 - throughput: 1072.961373390558 + inference_time: 958.0 + throughput: 1043.8413361169103 estimated_peak_memory_range: min: 12288 - max: 11889776 + max: 15501840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: j5we82e35 + job_id: jp4ly2dv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 984.0 - throughput: 1016.260162601626 + inference_time: 986.0 + throughput: 1014.1987829614604 estimated_peak_memory_range: min: 0 - max: 10859232 + max: 14779920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jpy1qr18p + job_id: j5q6ldr4p job_status: Passed torchscript_onnx: inference_time: 973.0 throughput: 1027.749229188078 estimated_peak_memory_range: min: 0 - max: 17243968 + max: 19905200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: jgz38o2o5 + job_id: jp14nme2p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:38:07Z' + timestamp: '2024-12-11T23:03:31Z' - torchscript_onnx_tflite: - inference_time: 1383.0 - throughput: 723.0657989877079 + inference_time: 1391.0 + throughput: 718.9072609633357 estimated_peak_memory_range: min: 16384 - max: 134440752 + max: 124074896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: jg9lkjlwg + job_id: jpxklz615 job_status: Passed torchscript_onnx_qnn: - inference_time: 1315.0 - throughput: 760.4562737642585 + inference_time: 1319.0 + throughput: 758.1501137225171 estimated_peak_memory_range: - min: 647168 - max: 2290704 + min: 626688 + max: 1858760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jp0zd3z95 + job_id: jglvyq285 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:37:56Z' + timestamp: '2024-12-11T23:03:19Z' - torchscript_onnx_tflite: - inference_time: 40717.0 - throughput: 24.55976619102586 + inference_time: 40720.0 + throughput: 24.557956777996072 estimated_peak_memory_range: - min: 65536 - max: 11109328 + min: 53248 + max: 13511120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: jp147y48p + job_id: j5mn0l6wp job_status: Passed torchscript_onnx_qnn: - inference_time: 41420.0 - throughput: 24.142926122646063 + inference_time: 41362.0 + throughput: 24.176780619892654 estimated_peak_memory_range: - min: 720896 - max: 6427712 + min: 565248 + max: 11219936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jgkeo7ewg + job_id: jp3jzr1lg job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:37:58Z' + timestamp: '2024-12-11T23:03:21Z' - torchscript_onnx_tflite: - inference_time: 1388.0 - throughput: 720.4610951008646 + inference_time: 1391.0 + throughput: 718.9072609633357 estimated_peak_memory_range: min: 16384 - max: 134592864 + max: 124789152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: jgdx8exrp + job_id: jgn6zwmr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1323.0 - throughput: 755.8578987150415 + inference_time: 1327.0 + throughput: 753.5795026375282 estimated_peak_memory_range: - min: 626688 - max: 1923104 + min: 630784 + max: 2029080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: j5q6ze6np + job_id: jgo2l9nxp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:37:59Z' + timestamp: '2024-12-11T23:03:22Z' - torchscript_onnx_tflite: - inference_time: 2476.0 - throughput: 403.8772213247173 + inference_time: 2486.0 + throughput: 402.2526146419952 estimated_peak_memory_range: min: 16384 - max: 8950608 + max: 8752592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: jp4lmkl85 + job_id: jprvl729g job_status: Passed torchscript_onnx_qnn: - inference_time: 2452.0 - throughput: 407.8303425774878 + inference_time: 2906.0 + throughput: 344.1156228492774 estimated_peak_memory_range: min: 0 - max: 6018240 + max: 6064112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jglvo6vj5 + job_id: jpv6lnrj5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:38:00Z' + timestamp: '2024-12-11T23:03:24Z' - torchscript_onnx_tflite: inference_time: 1389.0 throughput: 719.9424046076314 estimated_peak_memory_range: min: 16384 - max: 134524016 + max: 135145952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: jpxk3nk35 + job_id: jp2krz94p job_status: Passed torchscript_onnx_qnn: - inference_time: 1323.0 - throughput: 755.8578987150415 + inference_time: 1319.0 + throughput: 758.1501137225171 estimated_peak_memory_range: - min: 16384 - max: 1299112 + min: 630784 + max: 2205208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: j56yrey6p + job_id: jgjvr82xg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:38:01Z' + timestamp: '2024-12-11T23:03:25Z' - torchscript_onnx_tflite: - inference_time: 2559.0 - throughput: 390.77764751856193 + inference_time: 2562.0 + throughput: 390.32006245121 estimated_peak_memory_range: min: 16384 - max: 11843904 + max: 13688016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: j5mnoqndp + job_id: jpy1oyj7p job_status: Passed torchscript_onnx_qnn: - inference_time: 2660.0 - throughput: 375.9398496240602 + inference_time: 2698.0 + throughput: 370.64492216456637 estimated_peak_memory_range: min: 0 - max: 5931440 + max: 5985280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jp3jxvj3g + job_id: jped7nw15 job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:38:03Z' + timestamp: '2024-12-11T23:03:26Z' - torchscript_onnx_tflite: - inference_time: 1952.0 - throughput: 512.2950819672132 + inference_time: 1954.0 + throughput: 511.77072671443193 estimated_peak_memory_range: min: 16384 - max: 16154400 + max: 18431696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: jgn6ol6k5 + job_id: jp0zmx265 job_status: Passed torchscript_onnx_qnn: - inference_time: 1984.0 - throughput: 504.03225806451616 + inference_time: 1987.0 + throughput: 503.27126321087064 estimated_peak_memory_range: min: 618496 - max: 18698224 + max: 19994960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jgo2ok2qp + job_id: jgz3l0jk5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:38:04Z' + timestamp: '2024-12-11T23:03:28Z' - torchscript_onnx_qnn: - inference_time: 1437.0 - throughput: 695.8942240779402 + inference_time: 1456.0 + throughput: 686.8131868131868 estimated_peak_memory_range: min: 602112 max: 602112 @@ -485,7 +485,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jp8q60qkp + job_id: j56y80z0p + job_status: Passed + torchscript_onnx: + inference_time: 1348.0 + throughput: 741.839762611276 + estimated_peak_memory_range: + min: 25538560 + max: 25538560 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 55 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 55 + job_id: jgdxdmoep job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -494,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:38:08Z' + timestamp: '2024-12-11T23:03:32Z' diff --git a/qai_hub_models/models/resnet18_quantized/evaluate.py b/qai_hub_models/models/resnet18_quantized/evaluate.py index 11452ab1..d2487283 100644 --- a/qai_hub_models/models/resnet18_quantized/evaluate.py +++ b/qai_hub_models/models/resnet18_quantized/evaluate.py @@ -25,6 +25,7 @@ def main(): model_cls=Model, default_split_size=2500, supported_datasets=SUPPORTED_DATASETS, + supports_onnx=False, is_hub_quantized=True, ) args = parser.parse_args() diff --git a/qai_hub_models/models/resnet18_quantized/export.py b/qai_hub_models/models/resnet18_quantized/export.py index a88da624..91099841 100644 --- a/qai_hub_models/models/resnet18_quantized/export.py +++ b/qai_hub_models/models/resnet18_quantized/export.py @@ -241,7 +241,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, is_hub_quantized=True) + parser = export_parser(model_cls=Model, supports_onnx=False, is_hub_quantized=True) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/resnet18_quantized/info.yaml b/qai_hub_models/models/resnet18_quantized/info.yaml index d8ee81eb..8ee2364c 100644 --- a/qai_hub_models/models/resnet18_quantized/info.yaml +++ b/qai_hub_models/models/resnet18_quantized/info.yaml @@ -11,6 +11,7 @@ description: ResNet18 is a machine learning model that can classify images from tags: - backbone - quantized +imsdk_supported: true research_paper: https://arxiv.org/abs/1512.03385 research_paper_title: Deep Residual Learning for Image Recognition license: https://github.com/pytorch/vision/blob/main/LICENSE diff --git a/qai_hub_models/models/resnet18_quantized/perf.yaml b/qai_hub_models/models/resnet18_quantized/perf.yaml index 219003a0..4cd64b04 100644 --- a/qai_hub_models/models/resnet18_quantized/perf.yaml +++ b/qai_hub_models/models/resnet18_quantized/perf.yaml @@ -50,15 +50,14 @@ aggregated: models: - name: ResNet18Quantized universal_assets: - torchscript_onnx_tflite: mnlvgxzwm - torchscript_onnx: mn1z8kl8m + torchscript_onnx_tflite: mqp3z27vm performance_metrics: - torchscript_onnx_tflite: - inference_time: 403.0 - throughput: 2481.3895781637716 + inference_time: 404.0 + throughput: 2475.2475247524753 estimated_peak_memory_range: min: 12288 - max: 155739504 + max: 155866472 primary_compute_unit: NPU precision: int8 layer_info: @@ -66,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jp147d0lp + job_id: jgdxd9vlp job_status: Passed torchscript_onnx_qnn: - inference_time: 632.0 - throughput: 1582.2784810126582 + inference_time: 629.0 + throughput: 1589.825119236884 estimated_peak_memory_range: - min: 20480 - max: 8118944 + min: 12288 + max: 99649024 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: j5q6z7kmp + job_id: j56y8j37p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -90,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T03:50:25Z' + timestamp: '2024-12-12T01:19:09Z' - torchscript_onnx_tflite: - inference_time: 308.0 - throughput: 3246.753246753247 + inference_time: 311.0 + throughput: 3215.434083601286 estimated_peak_memory_range: min: 12288 - max: 15454032 + max: 14267232 primary_compute_unit: NPU precision: int8 layer_info: @@ -104,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: j57ykjwr5 + job_id: j57yewjr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 480.0 - throughput: 2083.3333333333335 + inference_time: 478.0 + throughput: 2092.050209205021 estimated_peak_memory_range: - min: 0 - max: 13722528 + min: 159744 + max: 16497104 primary_compute_unit: NPU precision: int8 layer_info: @@ -119,7 +118,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jglvo0zl5 + job_id: jp3jz34zg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -128,13 +127,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T03:50:27Z' + timestamp: '2024-12-12T01:19:10Z' - torchscript_onnx_tflite: - inference_time: 303.0 - throughput: 3300.3300330033003 + inference_time: 293.0 + throughput: 3412.9692832764504 estimated_peak_memory_range: - min: 8192 - max: 9894304 + min: 12288 + max: 13592928 primary_compute_unit: NPU precision: int8 layer_info: @@ -142,14 +141,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jp4lmxol5 + job_id: jpxklj795 job_status: Passed torchscript_onnx_qnn: - inference_time: 441.0 - throughput: 2267.573696145125 + inference_time: 439.0 + throughput: 2277.904328018223 estimated_peak_memory_range: min: 0 - max: 10458352 + max: 9841232 primary_compute_unit: NPU precision: int8 layer_info: @@ -157,7 +156,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: j56yr3j7p + job_id: jgo2l01dp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -166,13 +165,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T03:50:29Z' + timestamp: '2024-12-12T01:19:12Z' - torchscript_onnx_tflite: - inference_time: 1366.0 - throughput: 732.0644216691069 + inference_time: 1359.0 + throughput: 735.8351729212657 estimated_peak_memory_range: - min: 12288 - max: 13302864 + min: 0 + max: 17098768 primary_compute_unit: NPU precision: int8 layer_info: @@ -180,14 +179,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jpxk37j95 + job_id: j5mn02wqp job_status: Passed torchscript_onnx_qnn: - inference_time: 2024.0 - throughput: 494.0711462450593 + inference_time: 2043.0 + throughput: 489.47626040137055 estimated_peak_memory_range: min: 12288 - max: 7806384 + max: 6951648 primary_compute_unit: NPU precision: int8 layer_info: @@ -195,7 +194,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jp3jx43zg + job_id: jpv6lo1m5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -204,13 +203,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T03:50:09Z' + timestamp: '2024-12-12T01:19:14Z' - torchscript_onnx_tflite: - inference_time: 7058.0 - throughput: 141.68319637291017 + inference_time: 7140.0 + throughput: 140.0560224089636 estimated_peak_memory_range: - min: 12288 - max: 7220432 + min: 28672 + max: 2434624 primary_compute_unit: NPU precision: int8 layer_info: @@ -218,7 +217,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: j5mnow2qp + job_id: jgn6zy9m5 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -227,13 +226,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T03:49:50Z' + timestamp: '2024-12-12T01:18:55Z' - torchscript_onnx_tflite: - inference_time: 408.0 - throughput: 2450.9803921568628 + inference_time: 407.0 + throughput: 2457.002457002457 estimated_peak_memory_range: min: 28672 - max: 8338616 + max: 8461904 primary_compute_unit: NPU precision: int8 layer_info: @@ -241,14 +240,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jgn6o9ym5 + job_id: jprvlq4eg job_status: Passed torchscript_onnx_qnn: - inference_time: 601.0 - throughput: 1663.8935108153078 + inference_time: 602.0 + throughput: 1661.1295681063123 estimated_peak_memory_range: - min: 184320 - max: 1392712 + min: 176128 + max: 1805200 primary_compute_unit: NPU precision: int8 layer_info: @@ -256,7 +255,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jgo2o10dp + job_id: jgjvrm08g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -265,13 +264,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T03:50:11Z' + timestamp: '2024-12-12T01:19:16Z' - torchscript_onnx_tflite: inference_time: 4370.0 throughput: 228.83295194508008 estimated_peak_memory_range: min: 16384 - max: 10217440 + max: 13853664 primary_compute_unit: NPU precision: int8 layer_info: @@ -279,14 +278,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jprvo4qeg + job_id: jp2kr67mp job_status: Passed torchscript_onnx_qnn: - inference_time: 5053.0 - throughput: 197.90223629527014 + inference_time: 5005.0 + throughput: 199.8001998001998 estimated_peak_memory_range: - min: 98304 - max: 5601808 + min: 94208 + max: 10583200 primary_compute_unit: NPU precision: int8 layer_info: @@ -294,7 +293,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jgjvo0m8g + job_id: jgz3l9x65 job_status: Passed reference_device_info: name: SA7255P ADP @@ -303,13 +302,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T03:50:14Z' + timestamp: '2024-12-12T01:19:20Z' - torchscript_onnx_tflite: - inference_time: 407.0 - throughput: 2457.002457002457 + inference_time: 410.0 + throughput: 2439.0243902439024 estimated_peak_memory_range: - min: 28672 - max: 8511952 + min: 16384 + max: 8808816 primary_compute_unit: NPU precision: int8 layer_info: @@ -317,14 +316,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jp2k476mp + job_id: jpy1ow44p job_status: Passed torchscript_onnx_qnn: - inference_time: 602.0 - throughput: 1661.1295681063123 + inference_time: 596.0 + throughput: 1677.8523489932886 estimated_peak_memory_range: - min: 188416 - max: 1838928 + min: 20480 + max: 1513424 primary_compute_unit: NPU precision: int8 layer_info: @@ -332,7 +331,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jped8r105 + job_id: j5welvdj5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -341,13 +340,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T03:50:16Z' + timestamp: '2024-12-12T01:19:22Z' - torchscript_onnx_tflite: - inference_time: 777.0 - throughput: 1287.001287001287 + inference_time: 797.0 + throughput: 1254.7051442910915 estimated_peak_memory_range: - min: 12288 - max: 9478944 + min: 16384 + max: 9178640 primary_compute_unit: NPU precision: int8 layer_info: @@ -355,14 +354,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jpy1q4w4p + job_id: jp8qe938p job_status: Passed torchscript_onnx_qnn: - inference_time: 1094.0 - throughput: 914.0767824497258 + inference_time: 1693.0 + throughput: 590.6674542232723 estimated_peak_memory_range: min: 0 - max: 5987040 + max: 5709152 primary_compute_unit: NPU precision: int8 layer_info: @@ -370,7 +369,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jgz38x965 + job_id: jg9lz13vg job_status: Passed reference_device_info: name: SA8295P ADP @@ -379,13 +378,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T03:50:18Z' + timestamp: '2024-12-12T01:19:23Z' - torchscript_onnx_tflite: - inference_time: 410.0 - throughput: 2439.0243902439024 + inference_time: 406.0 + throughput: 2463.054187192118 estimated_peak_memory_range: min: 16384 - max: 141358216 + max: 8475296 primary_compute_unit: NPU precision: int8 layer_info: @@ -393,14 +392,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jp0zd1qe5 + job_id: jgke2nlog job_status: Passed torchscript_onnx_qnn: - inference_time: 601.0 - throughput: 1663.8935108153078 + inference_time: 604.0 + throughput: 1655.6291390728477 estimated_peak_memory_range: min: 180224 - max: 1566568 + max: 1364480 primary_compute_unit: NPU precision: int8 layer_info: @@ -408,7 +407,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: j5we8dvj5 + job_id: jp14nldlp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -417,13 +416,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T03:50:20Z' + timestamp: '2024-12-12T01:19:25Z' - torchscript_onnx_tflite: - inference_time: 688.0 - throughput: 1453.4883720930231 + inference_time: 667.0 + throughput: 1499.2503748125937 estimated_peak_memory_range: - min: 16384 - max: 8726448 + min: 12288 + max: 13917200 primary_compute_unit: NPU precision: int8 layer_info: @@ -431,14 +430,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jp8q6398p + job_id: j5q6lk7mp job_status: Passed torchscript_onnx_qnn: - inference_time: 1031.0 - throughput: 969.9321047526673 + inference_time: 1092.0 + throughput: 915.7509157509157 estimated_peak_memory_range: - min: 167936 - max: 5980384 + min: 208896 + max: 6163888 primary_compute_unit: NPU precision: int8 layer_info: @@ -446,7 +445,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jg9lk31vg + job_id: jgdxd9rlp job_status: Passed reference_device_info: name: SA8775P ADP @@ -455,13 +454,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T03:50:21Z' + timestamp: '2024-12-12T01:19:27Z' - torchscript_onnx_tflite: - inference_time: 479.0 - throughput: 2087.6826722338205 + inference_time: 472.0 + throughput: 2118.64406779661 estimated_peak_memory_range: min: 20480 - max: 17136464 + max: 18388400 primary_compute_unit: NPU precision: int8 layer_info: @@ -469,14 +468,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jgkeolnog + job_id: jglvyz0l5 job_status: Passed torchscript_onnx_qnn: - inference_time: 707.0 - throughput: 1414.4271570014143 + inference_time: 702.0 + throughput: 1424.5014245014245 estimated_peak_memory_range: - min: 163840 - max: 13886000 + min: 167936 + max: 18350176 primary_compute_unit: NPU precision: int8 layer_info: @@ -484,7 +483,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jp147dllp + job_id: j57yewvr5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -493,13 +492,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T03:50:23Z' + timestamp: '2024-12-12T01:19:29Z' - torchscript_onnx_qnn: - inference_time: 678.0 - throughput: 1474.9262536873157 + inference_time: 688.0 + throughput: 1453.4883720930231 estimated_peak_memory_range: - min: 540672 - max: 540672 + min: 528384 + max: 528384 primary_compute_unit: NPU precision: int8 layer_info: @@ -507,22 +506,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 54 - job_id: jpv6e1om5 - job_status: Passed - torchscript_onnx: - inference_time: 109769.0 - throughput: 9.110040175277174 - estimated_peak_memory_range: - min: 121602048 - max: 121602048 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 143 - layers_on_gpu: 0 - layers_on_cpu: 20 - total_layers: 163 - job_id: jgdx8rlep + job_id: jped71r05 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -531,4 +515,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T03:50:31Z' + timestamp: '2024-12-12T01:19:18Z' diff --git a/qai_hub_models/models/resnet50/perf.yaml b/qai_hub_models/models/resnet50/perf.yaml index 5386ae14..222b816b 100644 --- a/qai_hub_models/models/resnet50/perf.yaml +++ b/qai_hub_models/models/resnet50/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: ResNet50 universal_assets: - torchscript_onnx_tflite: mmdyr0kkm - torchscript_onnx: mmdyr06wm + torchscript_onnx_tflite: mn1wz7kzm + torchscript_onnx: mn7xl1loq performance_metrics: - torchscript_onnx_tflite: - inference_time: 2270.0 - throughput: 440.52863436123346 + inference_time: 2279.0 + throughput: 438.7889425186485 estimated_peak_memory_range: - min: 20480 - max: 244169376 + min: 32768 + max: 255408272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jglvo4rj5 + job_id: jg9lzq0vg job_status: Passed torchscript_onnx_qnn: - inference_time: 2386.0 - throughput: 419.11148365465215 + inference_time: 2394.0 + throughput: 417.7109440267335 estimated_peak_memory_range: - min: 36864 - max: 181343368 + min: 49152 + max: 181399288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jp147yz8p + job_id: jpy1oyr4p job_status: Passed torchscript_onnx: - inference_time: 2297.0 - throughput: 435.35045711798 + inference_time: 2281.0 + throughput: 438.4042086804033 estimated_peak_memory_range: - min: 618496 - max: 2324504 + min: 12288 + max: 77751288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jp8q60ykp + job_id: jped7ne05 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:37:21Z' + timestamp: '2024-12-11T23:02:44Z' - torchscript_onnx_tflite: - inference_time: 1597.0 - throughput: 626.1740763932373 + inference_time: 1621.0 + throughput: 616.9031462060457 estimated_peak_memory_range: min: 16384 - max: 27973600 + max: 27133344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: j56yr2l6p + job_id: jp14nm2lp job_status: Passed torchscript_onnx_qnn: - inference_time: 1710.0 - throughput: 584.7953216374269 + inference_time: 1694.0 + throughput: 590.318772136954 estimated_peak_memory_range: min: 0 - max: 25139424 + max: 28795792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgdx8e1rp + job_id: jp0zmx3e5 job_status: Passed torchscript_onnx: - inference_time: 1722.0 - throughput: 580.7200929152149 + inference_time: 1738.0 + throughput: 575.3739930955121 estimated_peak_memory_range: - min: 618496 - max: 84614848 + min: 0 + max: 84925600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jgkeo7xwg + job_id: jgz3l0o65 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:37:22Z' + timestamp: '2024-12-11T23:02:45Z' - torchscript_onnx_tflite: - inference_time: 1593.0 - throughput: 627.7463904582548 + inference_time: 1553.0 + throughput: 643.915003219575 estimated_peak_memory_range: min: 12288 - max: 24217792 + max: 26634112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jp3jxn23g + job_id: jgdxdmnlp job_status: Passed torchscript_onnx_qnn: - inference_time: 1635.0 - throughput: 611.6207951070336 + inference_time: 1409.0 + throughput: 709.7232079488999 estimated_peak_memory_range: min: 0 - max: 23588544 + max: 24567568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,7 +181,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j57yk0rv5 + job_id: jp8qek08p + job_status: Passed + torchscript_onnx: + inference_time: 1654.0 + throughput: 604.5949214026602 + estimated_peak_memory_range: + min: 0 + max: 34134704 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 128 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 128 + job_id: j5welr2j5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -190,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:37:23Z' + timestamp: '2024-12-11T23:02:46Z' - torchscript_onnx_tflite: - inference_time: 2271.0 - throughput: 440.33465433729634 + inference_time: 2266.0 + throughput: 441.306266548985 estimated_peak_memory_range: - min: 24576 - max: 255033552 + min: 20480 + max: 255649616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -204,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jgo2ozqqp + job_id: j57ye80r5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2172.0 - throughput: 460.4051565377532 + inference_time: 2183.0 + throughput: 458.0852038479157 estimated_peak_memory_range: - min: 659456 - max: 2042224 + min: 643072 + max: 2160960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jp4lmkr85 + job_id: jgke2k7og job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -228,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:37:11Z' + timestamp: '2024-12-11T23:02:35Z' - torchscript_onnx_tflite: - inference_time: 76242.0 - throughput: 13.116130216940794 + inference_time: 76255.0 + throughput: 13.11389417087404 estimated_peak_memory_range: - min: 69632 - max: 22030176 + min: 77824 + max: 23717680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -242,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jpv6e0kk5 + job_id: jp4ly2kl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 77456.0 - throughput: 12.91055567031605 + inference_time: 77474.0 + throughput: 12.907556083331182 estimated_peak_memory_range: - min: 761856 - max: 6317888 + min: 671744 + max: 10949600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j5mnoqxdp + job_id: jglvyq6l5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -266,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:37:14Z' + timestamp: '2024-12-11T23:02:37Z' - torchscript_onnx_tflite: - inference_time: 2273.0 - throughput: 439.9472063352398 + inference_time: 2275.0 + throughput: 439.56043956043953 estimated_peak_memory_range: - min: 2428928 - max: 257569264 + min: 16384 + max: 255351104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jgjvoznvg + job_id: jpxklzn95 job_status: Passed torchscript_onnx_qnn: - inference_time: 2189.0 - throughput: 456.82960255824577 + inference_time: 2186.0 + throughput: 457.45654162854527 estimated_peak_memory_range: - min: 626688 - max: 1939056 + min: 638976 + max: 1910008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgn6olvk5 + job_id: j56y80e7p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -304,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:37:15Z' + timestamp: '2024-12-11T23:02:38Z' - torchscript_onnx_tflite: - inference_time: 3761.0 - throughput: 265.8867322520606 + inference_time: 3759.0 + throughput: 266.02819898909286 estimated_peak_memory_range: min: 16384 - max: 15536336 + max: 18009760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jped8emo5 + job_id: j5mn0lqqp job_status: Passed torchscript_onnx_qnn: - inference_time: 4067.0 - throughput: 245.88148512417015 + inference_time: 4062.0 + throughput: 246.18414574101428 estimated_peak_memory_range: min: 0 - max: 6011120 + max: 6047888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jprvo830g + job_id: jp3jzrvzg job_status: Passed reference_device_info: name: SA8295P ADP @@ -342,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:37:16Z' + timestamp: '2024-12-11T23:02:39Z' - torchscript_onnx_tflite: - inference_time: 2270.0 - throughput: 440.52863436123346 + inference_time: 2275.0 + throughput: 439.56043956043953 estimated_peak_memory_range: - min: 16384 - max: 255324136 + min: 24576 + max: 255211032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jgz38odo5 + job_id: jgn6zwlm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2179.0 - throughput: 458.9261128958238 + inference_time: 2190.0 + throughput: 456.62100456621005 estimated_peak_memory_range: - min: 634880 - max: 2185552 + min: 675840 + max: 2167024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jp2k40yrp + job_id: jgo2l9kdp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -380,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:37:17Z' + timestamp: '2024-12-11T23:02:41Z' - torchscript_onnx_tflite: - inference_time: 4111.0 - throughput: 243.24981756263682 + inference_time: 4107.0 + throughput: 243.48672997321646 estimated_peak_memory_range: min: 16384 - max: 22258144 + max: 24197312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: j5we82635 + job_id: jprvl78eg job_status: Passed torchscript_onnx_qnn: - inference_time: 4234.0 - throughput: 236.18327822390174 + inference_time: 4210.0 + throughput: 237.52969121140143 estimated_peak_memory_range: - min: 552960 - max: 6521552 + min: 618496 + max: 6407984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jpy1qr38p + job_id: jpv6ln0m5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -418,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:37:19Z' + timestamp: '2024-12-11T23:02:42Z' - torchscript_onnx_tflite: - inference_time: 3122.0 - throughput: 320.30749519538756 + inference_time: 3108.0 + throughput: 321.75032175032175 estimated_peak_memory_range: - min: 20480 - max: 20932480 + min: 16384 + max: 23514224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jg9lkjnwg + job_id: jp2krz0mp job_status: Passed torchscript_onnx_qnn: - inference_time: 3141.0 - throughput: 318.3699458771092 + inference_time: 3133.0 + throughput: 319.1828917969997 estimated_peak_memory_range: - min: 622592 - max: 18517312 + min: 618496 + max: 21331184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jp0zd3095 + job_id: jgjvr8z8g job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -456,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:37:20Z' + timestamp: '2024-12-11T23:02:43Z' - torchscript_onnx_qnn: - inference_time: 2296.0 - throughput: 435.54006968641113 + inference_time: 2291.0 + throughput: 436.4906154517678 estimated_peak_memory_range: min: 602112 max: 602112 @@ -470,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jpxk3no35 + job_id: j5q6ldemp job_status: Passed torchscript_onnx: - inference_time: 2326.0 - throughput: 429.9226139294927 + inference_time: 2315.0 + throughput: 431.9654427645788 estimated_peak_memory_range: - min: 52383744 - max: 52383744 + min: 52400128 + max: 52400128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -485,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jglvo6mj5 + job_id: jg9lzqjvg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -494,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:37:24Z' + timestamp: '2024-12-11T23:02:47Z' diff --git a/qai_hub_models/models/resnet50_quantized/evaluate.py b/qai_hub_models/models/resnet50_quantized/evaluate.py index 42a16a6e..b97d88ce 100644 --- a/qai_hub_models/models/resnet50_quantized/evaluate.py +++ b/qai_hub_models/models/resnet50_quantized/evaluate.py @@ -25,6 +25,7 @@ def main(): model_cls=Model, default_split_size=2500, supported_datasets=SUPPORTED_DATASETS, + supports_onnx=False, is_hub_quantized=True, ) args = parser.parse_args() diff --git a/qai_hub_models/models/resnet50_quantized/export.py b/qai_hub_models/models/resnet50_quantized/export.py index 7fbee537..6bb57c8b 100644 --- a/qai_hub_models/models/resnet50_quantized/export.py +++ b/qai_hub_models/models/resnet50_quantized/export.py @@ -241,7 +241,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, is_hub_quantized=True) + parser = export_parser(model_cls=Model, supports_onnx=False, is_hub_quantized=True) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/resnet50_quantized/info.yaml b/qai_hub_models/models/resnet50_quantized/info.yaml index 7b926728..0efc2908 100644 --- a/qai_hub_models/models/resnet50_quantized/info.yaml +++ b/qai_hub_models/models/resnet50_quantized/info.yaml @@ -11,6 +11,7 @@ description: ResNet50 is a machine learning model that can classify images from tags: - backbone - quantized +imsdk_supported: true research_paper: https://arxiv.org/abs/1512.03385 research_paper_title: Deep Residual Learning for Image Recognition license: https://github.com/pytorch/vision/blob/main/LICENSE diff --git a/qai_hub_models/models/resnet50_quantized/perf.yaml b/qai_hub_models/models/resnet50_quantized/perf.yaml index d5384102..5b9117f8 100644 --- a/qai_hub_models/models/resnet50_quantized/perf.yaml +++ b/qai_hub_models/models/resnet50_quantized/perf.yaml @@ -47,14 +47,14 @@ aggregated: models: - name: ResNet50Quantized universal_assets: - torchscript_onnx_tflite: mmx7z35jq + torchscript_onnx_tflite: mq8dkjrzm performance_metrics: - torchscript_onnx_tflite: - inference_time: 789.0 - throughput: 1267.427122940431 + inference_time: 780.0 + throughput: 1282.051282051282 estimated_peak_memory_range: - min: 20480 - max: 46135056 + min: 16384 + max: 35002504 primary_compute_unit: NPU precision: int8 layer_info: @@ -62,14 +62,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: j56yr3k7p + job_id: jpv6lovm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1010.0 - throughput: 990.0990099009902 + inference_time: 1000.0 + throughput: 1000.0 estimated_peak_memory_range: - min: 16384 - max: 33048840 + min: 172032 + max: 33243504 primary_compute_unit: NPU precision: int8 layer_info: @@ -77,7 +77,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jp4lmxql5 + job_id: jgn6zyem5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -86,13 +86,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T03:49:15Z' + timestamp: '2024-12-12T01:17:59Z' - torchscript_onnx_tflite: - inference_time: 597.0 - throughput: 1675.0418760469013 + inference_time: 599.0 + throughput: 1669.449081803005 estimated_peak_memory_range: min: 12288 - max: 23479216 + max: 28676176 primary_compute_unit: NPU precision: int8 layer_info: @@ -100,14 +100,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jp3jx4yzg + job_id: jgjvrme8g job_status: Passed torchscript_onnx_qnn: - inference_time: 755.0 - throughput: 1324.5033112582782 + inference_time: 753.0 + throughput: 1328.0212483399735 estimated_peak_memory_range: min: 0 - max: 17546832 + max: 17032192 primary_compute_unit: NPU precision: int8 layer_info: @@ -115,7 +115,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jpxk37v95 + job_id: jprvlqyeg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -124,13 +124,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T03:49:16Z' + timestamp: '2024-12-12T01:18:01Z' - torchscript_onnx_tflite: inference_time: 544.0 throughput: 1838.235294117647 estimated_peak_memory_range: - min: 8192 - max: 17420640 + min: 12288 + max: 19782400 primary_compute_unit: NPU precision: int8 layer_info: @@ -138,14 +138,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jgo2o1jdp + job_id: jped71k05 job_status: Passed torchscript_onnx_qnn: inference_time: 726.0 throughput: 1377.4104683195592 estimated_peak_memory_range: min: 0 - max: 17557904 + max: 20136416 primary_compute_unit: NPU precision: int8 layer_info: @@ -153,7 +153,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: j5mnowrqp + job_id: jp2kr6mmp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -162,13 +162,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T03:49:18Z' + timestamp: '2024-12-12T01:18:03Z' - torchscript_onnx_tflite: - inference_time: 2713.0 - throughput: 368.59565057132323 + inference_time: 2769.0 + throughput: 361.14120621162874 estimated_peak_memory_range: min: 12288 - max: 22768096 + max: 26141680 primary_compute_unit: NPU precision: int8 layer_info: @@ -176,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jpv6e1jm5 + job_id: jgz3l9r65 job_status: Passed torchscript_onnx_qnn: - inference_time: 3965.0 - throughput: 252.20680958385876 + inference_time: 4091.0 + throughput: 244.43901246638964 estimated_peak_memory_range: - min: 204800 - max: 7992416 + min: 208896 + max: 7332032 primary_compute_unit: NPU precision: int8 layer_info: @@ -191,7 +191,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jgn6o92m5 + job_id: jpy1owd4p job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -200,13 +200,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T03:48:58Z' + timestamp: '2024-12-12T01:18:05Z' - torchscript_onnx_tflite: - inference_time: 11577.0 - throughput: 86.37816360024186 + inference_time: 11525.0 + throughput: 86.76789587852494 estimated_peak_memory_range: - min: 36864 - max: 2455912 + min: 16384 + max: 3062184 primary_compute_unit: NPU precision: int8 layer_info: @@ -214,7 +214,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jgjvo0j8g + job_id: j5welvqj5 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -223,13 +223,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T03:48:38Z' + timestamp: '2024-12-12T01:17:45Z' - torchscript_onnx_tflite: - inference_time: 786.0 - throughput: 1272.264631043257 + inference_time: 787.0 + throughput: 1270.6480304955528 estimated_peak_memory_range: - min: 12288 - max: 35305560 + min: 24576 + max: 34447472 primary_compute_unit: NPU precision: int8 layer_info: @@ -237,14 +237,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jped8rj05 + job_id: jg9lz1wvg job_status: Passed torchscript_onnx_qnn: - inference_time: 945.0 - throughput: 1058.2010582010582 + inference_time: 949.0 + throughput: 1053.740779768177 estimated_peak_memory_range: min: 180224 - max: 1740832 + max: 1798912 primary_compute_unit: NPU precision: int8 layer_info: @@ -252,7 +252,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jprvo4keg + job_id: jp0zmqre5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -261,13 +261,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T03:49:00Z' + timestamp: '2024-12-12T01:18:06Z' - torchscript_onnx_tflite: - inference_time: 8838.0 - throughput: 113.14777098891152 + inference_time: 8798.0 + throughput: 113.66219595362583 estimated_peak_memory_range: - min: 16384 - max: 15906128 + min: 397312 + max: 18581680 primary_compute_unit: NPU precision: int8 layer_info: @@ -275,14 +275,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jgz38x165 + job_id: jp14nlelp job_status: Passed torchscript_onnx_qnn: - inference_time: 9593.0 - throughput: 104.24267695194412 + inference_time: 9564.0 + throughput: 104.55876202425763 estimated_peak_memory_range: - min: 98304 - max: 5688112 + min: 102400 + max: 10499840 primary_compute_unit: NPU precision: int8 layer_info: @@ -290,7 +290,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jp0zd1ye5 + job_id: jgke2nyog job_status: Passed reference_device_info: name: SA7255P ADP @@ -299,13 +299,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T03:49:04Z' + timestamp: '2024-12-12T01:18:10Z' - torchscript_onnx_tflite: - inference_time: 790.0 - throughput: 1265.8227848101267 + inference_time: 791.0 + throughput: 1264.2225031605562 estimated_peak_memory_range: min: 16384 - max: 277819968 + max: 182584048 primary_compute_unit: NPU precision: int8 layer_info: @@ -313,14 +313,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: j5we8djj5 + job_id: jgdxd9olp job_status: Passed torchscript_onnx_qnn: - inference_time: 952.0 - throughput: 1050.420168067227 + inference_time: 950.0 + throughput: 1052.6315789473683 estimated_peak_memory_range: - min: 180224 - max: 1656080 + min: 176128 + max: 1659096 primary_compute_unit: NPU precision: int8 layer_info: @@ -328,7 +328,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jp8q63o8p + job_id: j5q6lk2mp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -337,13 +337,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T03:49:06Z' + timestamp: '2024-12-12T01:18:12Z' - torchscript_onnx_tflite: - inference_time: 1241.0 - throughput: 805.8017727639001 + inference_time: 1242.0 + throughput: 805.1529790660226 estimated_peak_memory_range: - min: 12288 - max: 14848416 + min: 16384 + max: 15027104 primary_compute_unit: NPU precision: int8 layer_info: @@ -351,14 +351,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jg9lk36vg + job_id: j57yewdr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1874.0 - throughput: 533.6179295624333 + inference_time: 1518.0 + throughput: 658.7615283267457 estimated_peak_memory_range: min: 0 - max: 5926112 + max: 5663424 primary_compute_unit: NPU precision: int8 layer_info: @@ -366,7 +366,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jgkeolzog + job_id: jglvyzkl5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -375,13 +375,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T03:49:07Z' + timestamp: '2024-12-12T01:18:14Z' - torchscript_onnx_tflite: - inference_time: 781.0 - throughput: 1280.4097311139565 + inference_time: 788.0 + throughput: 1269.0355329949239 estimated_peak_memory_range: min: 20480 - max: 45348776 + max: 244839840 primary_compute_unit: NPU precision: int8 layer_info: @@ -389,14 +389,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jp147drlp + job_id: jp4lyowl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 951.0 - throughput: 1051.5247108307046 + inference_time: 953.0 + throughput: 1049.3179433368311 estimated_peak_memory_range: - min: 20480 - max: 1265872 + min: 184320 + max: 1357992 primary_compute_unit: NPU precision: int8 layer_info: @@ -404,7 +404,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: j5q6z78mp + job_id: j56y8j17p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -413,13 +413,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T03:49:09Z' + timestamp: '2024-12-12T01:18:16Z' - torchscript_onnx_tflite: - inference_time: 1081.0 - throughput: 925.0693802035153 + inference_time: 1074.0 + throughput: 931.0986964618249 estimated_peak_memory_range: min: 16384 - max: 16914336 + max: 20722752 primary_compute_unit: NPU precision: int8 layer_info: @@ -427,14 +427,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jgdx8rjlp + job_id: jpxklj195 job_status: Passed torchscript_onnx_qnn: - inference_time: 1462.0 - throughput: 683.9945280437756 + inference_time: 1433.0 + throughput: 697.8367062107467 estimated_peak_memory_range: - min: 0 - max: 5810560 + min: 118784 + max: 6003872 primary_compute_unit: NPU precision: int8 layer_info: @@ -442,7 +442,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jglvo0nl5 + job_id: jp3jz3mzg job_status: Passed reference_device_info: name: SA8775P ADP @@ -451,13 +451,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T03:49:11Z' + timestamp: '2024-12-12T01:18:17Z' - torchscript_onnx_tflite: - inference_time: 906.0 - throughput: 1103.7527593818984 + inference_time: 914.0 + throughput: 1094.0919037199126 estimated_peak_memory_range: - min: 16384 - max: 23785088 + min: 438272 + max: 28460880 primary_compute_unit: NPU precision: int8 layer_info: @@ -465,14 +465,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: j57ykjzr5 + job_id: j5mn02zqp job_status: Passed torchscript_onnx_qnn: - inference_time: 1130.0 - throughput: 884.9557522123894 + inference_time: 1129.0 + throughput: 885.7395925597874 estimated_peak_memory_range: min: 167936 - max: 18652320 + max: 19487600 primary_compute_unit: NPU precision: int8 layer_info: @@ -480,7 +480,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: j56yr367p + job_id: jgo2l0vdp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -489,13 +489,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T03:49:13Z' + timestamp: '2024-12-12T01:18:19Z' - torchscript_onnx_qnn: - inference_time: 1419.0 - throughput: 704.7216349541931 + inference_time: 1009.0 + throughput: 991.0802775024777 estimated_peak_memory_range: - min: 495616 - max: 495616 + min: 479232 + max: 479232 primary_compute_unit: NPU precision: int8 layer_info: @@ -503,7 +503,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jpy1q4e4p + job_id: jp8qe978p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -512,4 +512,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T03:49:20Z' + timestamp: '2024-12-12T01:18:08Z' diff --git a/qai_hub_models/models/resnext101/perf.yaml b/qai_hub_models/models/resnext101/perf.yaml index 1b5be09c..6f902512 100644 --- a/qai_hub_models/models/resnext101/perf.yaml +++ b/qai_hub_models/models/resnext101/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: ResNeXt101 universal_assets: - torchscript_onnx_tflite: mno3540vn - torchscript_onnx: mqy3d597m + torchscript_onnx_tflite: mqyv3rv9q + torchscript_onnx: mn0jxyj9m performance_metrics: - torchscript_onnx_tflite: - inference_time: 6511.0 - throughput: 153.5862386730149 + inference_time: 6455.0 + throughput: 154.91866769945779 estimated_peak_memory_range: - min: 20480 - max: 40275784 + min: 24576 + max: 36693576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jprvo6q0g + job_id: jp3jzrjzg job_status: Passed torchscript_onnx_qnn: - inference_time: 6630.0 - throughput: 150.82956259426848 + inference_time: 6625.0 + throughput: 150.9433962264151 estimated_peak_memory_range: min: 16384 - max: 33793160 + max: 38395096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jgo2oz0qp + job_id: j57ye82r5 job_status: Passed torchscript_onnx: - inference_time: 7105.0 - throughput: 140.74595355383534 + inference_time: 7141.0 + throughput: 140.0364094664613 estimated_peak_memory_range: - min: 12288 - max: 203272864 + min: 634880 + max: 4263272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: jpxk39235 + job_id: j5q6ld0mp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:36:33Z' + timestamp: '2024-12-11T23:01:57Z' - torchscript_onnx_tflite: - inference_time: 4666.0 - throughput: 214.3163309044149 + inference_time: 4668.0 + throughput: 214.22450728363324 estimated_peak_memory_range: min: 16384 - max: 91786464 + max: 94359024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jp2k4x6rp + job_id: jgo2l92dp job_status: Passed torchscript_onnx_qnn: - inference_time: 4757.0 - throughput: 210.21652301870927 + inference_time: 4704.0 + throughput: 212.58503401360545 estimated_peak_memory_range: min: 618496 - max: 93430400 + max: 93467968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jpv6eqxk5 + job_id: jp4ly2nl5 job_status: Passed torchscript_onnx: - inference_time: 5130.0 - throughput: 194.9317738791423 + inference_time: 4995.0 + throughput: 200.20020020020021 estimated_peak_memory_range: - min: 0 - max: 391459488 + min: 667648 + max: 394593696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: j5mnoeydp + job_id: jglvyq4l5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:36:34Z' + timestamp: '2024-12-11T23:01:58Z' - torchscript_onnx_tflite: - inference_time: 4616.0 - throughput: 216.63778162911612 + inference_time: 4582.0 + throughput: 218.2453077258839 estimated_peak_memory_range: min: 12288 - max: 101740320 + max: 102777648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jpy1qzw8p + job_id: jpv6ln6m5 job_status: Passed torchscript_onnx_qnn: - inference_time: 4691.0 - throughput: 213.1741632914091 + inference_time: 4649.0 + throughput: 215.10002151000216 estimated_peak_memory_range: min: 0 - max: 100116016 + max: 101467232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jgjvod4vg + job_id: jpxklz995 job_status: Passed torchscript_onnx: - inference_time: 5091.0 - throughput: 196.42506383814575 + inference_time: 4506.0 + throughput: 221.92632046160674 estimated_peak_memory_range: min: 0 - max: 168664384 + max: 168398736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: jgn6o08k5 + job_id: j56y8027p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:36:35Z' + timestamp: '2024-12-11T23:01:59Z' - torchscript_onnx_tflite: - inference_time: 6443.0 - throughput: 155.20720161415488 + inference_time: 6502.0 + throughput: 153.79883112888342 estimated_peak_memory_range: - min: 16384 - max: 36041488 + min: 40960 + max: 35893960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jp0zd4q95 + job_id: jgjvr8v8g job_status: Passed torchscript_onnx_qnn: - inference_time: 6769.0 - throughput: 147.73230905599056 + inference_time: 6763.0 + throughput: 147.8633742422002 estimated_peak_memory_range: - min: 643072 - max: 1972704 + min: 638976 + max: 1921872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jped8o3o5 + job_id: j5mn0leqp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:36:23Z' + timestamp: '2024-12-11T23:01:48Z' - torchscript_onnx_tflite: - inference_time: 272498.0 - throughput: 3.669751704599667 + inference_time: 272495.0 + throughput: 3.6697921062771792 estimated_peak_memory_range: - min: 24576 - max: 100300608 + min: 77824 + max: 101406880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jp8q629kp + job_id: jped7nd05 job_status: Passed torchscript_onnx_qnn: - inference_time: 273074.0 - throughput: 3.662011029977222 + inference_time: 272992.0 + throughput: 3.6631110069159534 estimated_peak_memory_range: - min: 704512 - max: 6688720 + min: 651264 + max: 11161216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j5we8wn35 + job_id: jprvl76eg job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:36:26Z' + timestamp: '2024-12-11T23:01:50Z' - torchscript_onnx_tflite: - inference_time: 6431.0 - throughput: 155.49681231534754 + inference_time: 6474.0 + throughput: 154.46400988569664 estimated_peak_memory_range: - min: 20480 - max: 35494272 + min: 16384 + max: 35813088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jgkeovnwg + job_id: jgz3l0365 job_status: Passed torchscript_onnx_qnn: - inference_time: 6840.0 - throughput: 146.19883040935673 + inference_time: 6905.0 + throughput: 144.82259232440262 estimated_peak_memory_range: - min: 684032 - max: 2021224 + min: 32768 + max: 1169616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jg9lk0ewg + job_id: jp2krzxmp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:36:27Z' + timestamp: '2024-12-11T23:01:52Z' - torchscript_onnx_tflite: - inference_time: 10716.0 - throughput: 93.3184023889511 + inference_time: 10702.0 + throughput: 93.44047841524949 estimated_peak_memory_range: - min: 20480 - max: 52313840 + min: 49152 + max: 53367424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: j5q6z0knp + job_id: j5welrej5 job_status: Passed torchscript_onnx_qnn: - inference_time: 10958.0 - throughput: 91.25752874612155 + inference_time: 10882.0 + throughput: 91.89487226612755 estimated_peak_memory_range: - min: 663552 - max: 6640288 + min: 638976 + max: 6847248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jp1472x8p + job_id: jpy1oyz4p job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:36:28Z' + timestamp: '2024-12-11T23:01:53Z' - torchscript_onnx_tflite: - inference_time: 6499.0 - throughput: 153.86982612709647 + inference_time: 6454.0 + throughput: 154.94267121165169 estimated_peak_memory_range: - min: 24576 - max: 36053152 + min: 20480 + max: 36179440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jglvo4zj5 + job_id: jg9lzqlvg job_status: Passed torchscript_onnx_qnn: - inference_time: 6811.0 - throughput: 146.82131845543972 + inference_time: 6864.0 + throughput: 145.6876456876457 estimated_peak_memory_range: - min: 638976 - max: 1926288 + min: 663552 + max: 2005528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jgdx8nlrp + job_id: jp0zmx4e5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:36:29Z' - - torchscript_onnx_qnn: - inference_time: 12373.0 - throughput: 80.82114281095934 + timestamp: '2024-12-11T23:01:54Z' + - torchscript_onnx_tflite: + inference_time: 12407.0 + throughput: 80.59966148142178 estimated_peak_memory_range: - min: 0 - max: 5934464 + min: 40960 + max: 101382384 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 147 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 147 + job_id: jp14nm4lp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 12414.0 + throughput: 80.55421298533913 + estimated_peak_memory_range: + min: 647168 + max: 11406096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j57yk23v5 + job_id: jp8qek28p job_status: Passed reference_device_info: name: SA8775P ADP @@ -418,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:36:31Z' + timestamp: '2024-12-11T23:01:55Z' - torchscript_onnx_tflite: - inference_time: 9220.0 - throughput: 108.45986984815619 + inference_time: 9109.0 + throughput: 109.78153474585575 estimated_peak_memory_range: - min: 69632 - max: 59402352 + min: 45056 + max: 60116224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jp3jxn33g + job_id: jgdxdmxlp job_status: Passed torchscript_onnx_qnn: - inference_time: 9234.0 - throughput: 108.29542993285683 + inference_time: 9486.0 + throughput: 105.41851149061775 estimated_peak_memory_range: - min: 638976 - max: 59945920 + min: 0 + max: 58774400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jp4lmn085 + job_id: jgke2kvog job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -456,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:36:32Z' + timestamp: '2024-12-11T23:01:56Z' - torchscript_onnx_qnn: - inference_time: 6844.0 - throughput: 146.11338398597312 + inference_time: 6867.0 + throughput: 145.623998835008 estimated_peak_memory_range: min: 602112 max: 602112 @@ -470,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jgz382ko5 + job_id: jgn6zw0m5 job_status: Passed torchscript_onnx: - inference_time: 6810.0 - throughput: 146.84287812041117 + inference_time: 6762.0 + throughput: 147.88524105294292 estimated_peak_memory_range: - min: 181125120 - max: 181125120 + min: 182530048 + max: 182530048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -485,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: jprvo6j0g + job_id: jp3jzrnzg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -494,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:36:36Z' + timestamp: '2024-12-11T23:02:00Z' diff --git a/qai_hub_models/models/resnext101_quantized/evaluate.py b/qai_hub_models/models/resnext101_quantized/evaluate.py index 26fe838f..a79fb983 100644 --- a/qai_hub_models/models/resnext101_quantized/evaluate.py +++ b/qai_hub_models/models/resnext101_quantized/evaluate.py @@ -25,6 +25,7 @@ def main(): model_cls=Model, default_split_size=2500, supported_datasets=SUPPORTED_DATASETS, + supports_onnx=False, is_hub_quantized=True, ) args = parser.parse_args() diff --git a/qai_hub_models/models/resnext101_quantized/export.py b/qai_hub_models/models/resnext101_quantized/export.py index b9b636c8..c841b76e 100644 --- a/qai_hub_models/models/resnext101_quantized/export.py +++ b/qai_hub_models/models/resnext101_quantized/export.py @@ -241,7 +241,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, is_hub_quantized=True) + parser = export_parser(model_cls=Model, supports_onnx=False, is_hub_quantized=True) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/resnext101_quantized/perf.yaml b/qai_hub_models/models/resnext101_quantized/perf.yaml index bda000e8..b80c3a74 100644 --- a/qai_hub_models/models/resnext101_quantized/perf.yaml +++ b/qai_hub_models/models/resnext101_quantized/perf.yaml @@ -50,14 +50,14 @@ aggregated: models: - name: ResNeXt101Quantized universal_assets: - torchscript_onnx_tflite: mng1drjen + torchscript_onnx_tflite: mnz1vzl6q performance_metrics: - torchscript_onnx_tflite: - inference_time: 2844.0 - throughput: 351.6174402250352 + inference_time: 2793.0 + throughput: 358.03795202291445 estimated_peak_memory_range: min: 24576 - max: 25785840 + max: 34623640 primary_compute_unit: NPU precision: int8 layer_info: @@ -65,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: j5mnow7qp + job_id: jp8qe9mzp job_status: Passed torchscript_onnx_qnn: - inference_time: 3096.0 - throughput: 322.99741602067184 + inference_time: 3039.0 + throughput: 329.0556103981573 estimated_peak_memory_range: min: 12288 - max: 30869336 + max: 31215392 primary_compute_unit: NPU precision: int8 layer_info: @@ -80,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jpv6e18m5 + job_id: jp14nlwkp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -89,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T03:48:03Z' + timestamp: '2024-12-12T01:16:47Z' - torchscript_onnx_tflite: - inference_time: 2070.0 - throughput: 483.09178743961354 + inference_time: 2062.0 + throughput: 484.96605237633366 estimated_peak_memory_range: min: 16384 - max: 97398496 + max: 95015904 primary_compute_unit: NPU precision: int8 layer_info: @@ -103,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jgn6o94m5 + job_id: jgke2nqyg job_status: Passed torchscript_onnx_qnn: - inference_time: 2325.0 - throughput: 430.10752688172045 + inference_time: 2338.0 + throughput: 427.71599657827204 estimated_peak_memory_range: min: 12288 - max: 99418672 + max: 97884848 primary_compute_unit: NPU precision: int8 layer_info: @@ -118,7 +118,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jgjvo098g + job_id: jgdxd9qkp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -127,13 +127,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T03:48:04Z' + timestamp: '2024-12-12T01:16:49Z' - torchscript_onnx_tflite: - inference_time: 2053.0 - throughput: 487.0920603994155 + inference_time: 2052.0 + throughput: 487.32943469785573 estimated_peak_memory_range: - min: 32768 - max: 92274528 + min: 12288 + max: 92670672 primary_compute_unit: NPU precision: int8 layer_info: @@ -141,14 +141,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jprvo4reg + job_id: jglvyz2e5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2057.0 - throughput: 486.1448711716091 + inference_time: 2281.0 + throughput: 438.4042086804033 estimated_peak_memory_range: - min: 0 - max: 95924928 + min: 57344 + max: 96981728 primary_compute_unit: NPU precision: int8 layer_info: @@ -156,7 +156,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jped8rq05 + job_id: j5welv3j5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -165,13 +165,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T03:48:06Z' + timestamp: '2024-12-12T01:16:51Z' - torchscript_onnx_tflite: - inference_time: 10338.0 - throughput: 96.73050880247631 + inference_time: 10346.0 + throughput: 96.65571235260003 estimated_peak_memory_range: min: 12288 - max: 103348528 + max: 105339104 primary_compute_unit: NPU precision: int8 layer_info: @@ -179,14 +179,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jpy1q4l4p + job_id: j56y8jzvp job_status: Passed torchscript_onnx_qnn: - inference_time: 14586.0 - throughput: 68.55889208830385 + inference_time: 15672.0 + throughput: 63.808065339458906 estimated_peak_memory_range: - min: 184320 - max: 8175840 + min: 131072 + max: 12113856 primary_compute_unit: NPU precision: int8 layer_info: @@ -194,7 +194,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jgz38x665 + job_id: jg9lz1yvg job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -203,13 +203,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T03:47:46Z' + timestamp: '2024-12-12T01:16:52Z' - torchscript_onnx_tflite: - inference_time: 129516.0 - throughput: 7.72105376941845 + inference_time: 107475.0 + throughput: 9.304489416143289 estimated_peak_memory_range: - min: 12288 - max: 549510368 + min: 32768 + max: 546978456 primary_compute_unit: GPU precision: int8 layer_info: @@ -217,7 +217,7 @@ models: layers_on_gpu: 125 layers_on_cpu: 11 total_layers: 150 - job_id: jp0zd1we5 + job_id: jp3jz31xg job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -226,13 +226,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T03:47:26Z' + timestamp: '2024-12-12T01:16:32Z' - torchscript_onnx_tflite: - inference_time: 2767.0 - throughput: 361.4022406938923 + inference_time: 2833.0 + throughput: 352.98270384751146 estimated_peak_memory_range: min: 16384 - max: 27648072 + max: 34775776 primary_compute_unit: NPU precision: int8 layer_info: @@ -240,14 +240,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jp8q63n8p + job_id: jgo2l0n4p job_status: Passed torchscript_onnx_qnn: - inference_time: 2934.0 - throughput: 340.83162917518746 + inference_time: 2961.0 + throughput: 337.7237419790611 estimated_peak_memory_range: - min: 188416 - max: 1397608 + min: 176128 + max: 1314232 primary_compute_unit: NPU precision: int8 layer_info: @@ -255,7 +255,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: j5we8dkj5 + job_id: jp14nlwlp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -264,13 +264,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T03:47:48Z' + timestamp: '2024-12-12T01:16:54Z' - torchscript_onnx_tflite: - inference_time: 34256.0 - throughput: 29.19196637085474 + inference_time: 34246.0 + throughput: 29.200490568241545 estimated_peak_memory_range: - min: 36864 - max: 89579248 + min: 24576 + max: 90490800 primary_compute_unit: NPU precision: int8 layer_info: @@ -278,14 +278,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jgkeol1og + job_id: jpv6lor75 job_status: Passed torchscript_onnx_qnn: - inference_time: 34822.0 - throughput: 28.717477456780195 + inference_time: 34773.0 + throughput: 28.757944382135566 estimated_peak_memory_range: - min: 143360 - max: 5735728 + min: 155648 + max: 10259392 primary_compute_unit: NPU precision: int8 layer_info: @@ -293,7 +293,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jp147d9lp + job_id: j57yewxr5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -302,13 +302,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T03:47:52Z' + timestamp: '2024-12-12T01:16:58Z' - torchscript_onnx_tflite: - inference_time: 2925.0 - throughput: 341.88034188034186 + inference_time: 2750.0 + throughput: 363.6363636363636 estimated_peak_memory_range: - min: 20480 - max: 27096216 + min: 16384 + max: 29240848 primary_compute_unit: NPU precision: int8 layer_info: @@ -316,14 +316,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: j5q6z7nmp + job_id: jgjvrm27g job_status: Passed torchscript_onnx_qnn: - inference_time: 3019.0 - throughput: 331.2355084465055 + inference_time: 3117.0 + throughput: 320.82130253448827 estimated_peak_memory_range: - min: 184320 - max: 1448416 + min: 172032 + max: 1365440 primary_compute_unit: NPU precision: int8 layer_info: @@ -331,7 +331,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jgdx8rklp + job_id: jp4lyovl5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -340,13 +340,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T03:47:53Z' + timestamp: '2024-12-12T01:16:59Z' - torchscript_onnx_tflite: - inference_time: 4060.0 - throughput: 246.30541871921181 + inference_time: 3990.0 + throughput: 250.6265664160401 estimated_peak_memory_range: min: 16384 - max: 90517936 + max: 91633984 primary_compute_unit: NPU precision: int8 layer_info: @@ -354,14 +354,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jglvo0dl5 + job_id: jped71w75 job_status: Passed torchscript_onnx_qnn: - inference_time: 4266.0 - throughput: 234.4116268166901 + inference_time: 4826.0 + throughput: 207.21094073767094 estimated_peak_memory_range: min: 196608 - max: 5958240 + max: 6098992 primary_compute_unit: NPU precision: int8 layer_info: @@ -369,7 +369,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: j57ykjqr5 + job_id: jpxkljy95 job_status: Passed reference_device_info: name: SA8295P ADP @@ -378,13 +378,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T03:47:55Z' + timestamp: '2024-12-12T01:17:01Z' - torchscript_onnx_tflite: - inference_time: 2779.0 - throughput: 359.84166966534724 + inference_time: 2947.0 + throughput: 339.328130302002 estimated_peak_memory_range: - min: 16384 - max: 36602096 + min: 0 + max: 34656240 primary_compute_unit: NPU precision: int8 layer_info: @@ -392,14 +392,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: j56yr3x7p + job_id: jgz3l9jz5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3041.0 - throughput: 328.83919763235775 + inference_time: 3018.0 + throughput: 331.3452617627568 estimated_peak_memory_range: min: 184320 - max: 1645288 + max: 1833008 primary_compute_unit: NPU precision: int8 layer_info: @@ -407,7 +407,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jp4lmxzl5 + job_id: jgn6zy3m5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -416,13 +416,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T03:47:57Z' + timestamp: '2024-12-12T01:17:03Z' - torchscript_onnx_tflite: - inference_time: 3738.0 - throughput: 267.5227394328518 + inference_time: 3751.0 + throughput: 266.5955745134631 estimated_peak_memory_range: min: 16384 - max: 89771248 + max: 90448576 primary_compute_unit: NPU precision: int8 layer_info: @@ -430,14 +430,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jp3jx4dzg + job_id: j5welv3z5 job_status: Passed torchscript_onnx_qnn: - inference_time: 4215.0 - throughput: 237.2479240806643 + inference_time: 4229.0 + throughput: 236.46252069047057 estimated_peak_memory_range: - min: 167936 - max: 5927360 + min: 163840 + max: 6021312 primary_compute_unit: NPU precision: int8 layer_info: @@ -445,7 +445,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jpxk37w95 + job_id: jprvlqeeg job_status: Passed reference_device_info: name: SA8775P ADP @@ -454,13 +454,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T03:47:59Z' + timestamp: '2024-12-12T01:17:05Z' - torchscript_onnx_tflite: - inference_time: 3304.0 - throughput: 302.6634382566586 + inference_time: 3236.0 + throughput: 309.02348578491967 estimated_peak_memory_range: min: 16384 - max: 101805424 + max: 103919360 primary_compute_unit: NPU precision: int8 layer_info: @@ -468,14 +468,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 150 - job_id: jgo2o1xdp + job_id: jg9lz1yqg job_status: Passed torchscript_onnx_qnn: - inference_time: 3517.0 - throughput: 284.3332385555872 + inference_time: 3636.0 + throughput: 275.027502750275 estimated_peak_memory_range: - min: 12288 - max: 101800816 + min: 0 + max: 104606880 primary_compute_unit: NPU precision: int8 layer_info: @@ -483,7 +483,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: j5mnowjqp + job_id: jp2kr6lmp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -492,13 +492,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T03:48:01Z' + timestamp: '2024-12-12T01:17:07Z' - torchscript_onnx_qnn: - inference_time: 3093.0 - throughput: 323.31070158422244 + inference_time: 3107.0 + throughput: 321.853878339234 estimated_peak_memory_range: - min: 159744 - max: 159744 + min: 180224 + max: 180224 primary_compute_unit: NPU precision: int8 layer_info: @@ -506,7 +506,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 246 - job_id: jg9lk3rvg + job_id: jgdxd9qlp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -515,4 +515,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T03:48:08Z' + timestamp: '2024-12-12T01:16:56Z' diff --git a/qai_hub_models/models/resnext50/perf.yaml b/qai_hub_models/models/resnext50/perf.yaml index 0d09eccf..3ad76e0c 100644 --- a/qai_hub_models/models/resnext50/perf.yaml +++ b/qai_hub_models/models/resnext50/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: ResNeXt50 universal_assets: - torchscript_onnx_tflite: mn7lp52jq - torchscript_onnx: mq3e23g3m + torchscript_onnx_tflite: mq9lp0x2q + torchscript_onnx: mqv64y0em performance_metrics: - torchscript_onnx_tflite: - inference_time: 2524.0 - throughput: 396.19651347068145 + inference_time: 2505.0 + throughput: 399.2015968063872 estimated_peak_memory_range: - min: 20480 - max: 199536008 + min: 16384 + max: 199173392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jpxk39v15 + job_id: jpy1oy34p job_status: Passed torchscript_onnx_qnn: - inference_time: 2585.0 - throughput: 386.84719535783364 + inference_time: 2583.0 + throughput: 387.14672861014327 estimated_peak_memory_range: - min: 618496 - max: 136487176 + min: 626688 + max: 136612744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jglvo4n85 + job_id: jped7nm05 job_status: Passed torchscript_onnx: - inference_time: 2761.0 - throughput: 362.18761318362914 + inference_time: 2736.0 + throughput: 365.4970760233918 estimated_peak_memory_range: - min: 618496 - max: 2427344 + min: 12288 + max: 59969296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: j5we8wv35 + job_id: jprvl7veg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:35:42Z' + timestamp: '2024-12-11T23:01:06Z' - torchscript_onnx_tflite: - inference_time: 1777.0 - throughput: 562.7462014631401 + inference_time: 1749.0 + throughput: 571.7552887364208 estimated_peak_memory_range: min: 16384 - max: 36204976 + max: 40512272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: j5mnoerwp + job_id: jp0zmx0e5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1833.0 - throughput: 545.5537370430987 + inference_time: 1834.0 + throughput: 545.2562704471102 estimated_peak_memory_range: - min: 0 - max: 36134688 + min: 618496 + max: 39034448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j56yr260p + job_id: jgz3l0d65 job_status: Passed torchscript_onnx: - inference_time: 1999.0 - throughput: 500.25012506253125 + inference_time: 2011.0 + throughput: 497.2650422675286 estimated_peak_memory_range: min: 0 - max: 186500272 + max: 189005152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jg9lk01wg + job_id: jp2krzkmp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:35:43Z' + timestamp: '2024-12-11T23:01:07Z' - torchscript_onnx_tflite: - inference_time: 1678.0 - throughput: 595.9475566150179 + inference_time: 1679.0 + throughput: 595.5926146515783 estimated_peak_memory_range: min: 12288 - max: 63037744 + max: 41029136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jgdxr36zp + job_id: jp8qeky8p job_status: Passed torchscript_onnx_qnn: - inference_time: 1738.0 - throughput: 575.3739930955121 + inference_time: 1788.0 + throughput: 559.2841163310962 estimated_peak_memory_range: min: 0 - max: 38410736 + max: 40053536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jp3jxnklg + job_id: j5welr6j5 job_status: Passed torchscript_onnx: - inference_time: 1714.0 - throughput: 583.4305717619603 + inference_time: 1682.0 + throughput: 594.5303210463734 estimated_peak_memory_range: min: 0 - max: 65495344 + max: 66703904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jp1472l8p + job_id: jpy1oy14p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:35:44Z' + timestamp: '2024-12-11T23:01:08Z' - torchscript_onnx_tflite: - inference_time: 2505.0 - throughput: 399.2015968063872 + inference_time: 2502.0 + throughput: 399.68025579536373 estimated_peak_memory_range: - min: 16384 - max: 178735584 + min: 28672 + max: 189376160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jprvo6k9g + job_id: j5q6ldqmp job_status: Passed torchscript_onnx_qnn: - inference_time: 2515.0 - throughput: 397.61431411530816 + inference_time: 2520.0 + throughput: 396.8253968253968 estimated_peak_memory_range: - min: 638976 - max: 1868584 + min: 634880 + max: 2268120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgo2ozyxp + job_id: jg9lzqnvg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:35:32Z' + timestamp: '2024-12-11T23:00:56Z' - torchscript_onnx_tflite: - inference_time: 85599.0 - throughput: 11.682379467049849 + inference_time: 85640.0 + throughput: 11.676786548341896 estimated_peak_memory_range: - min: 36864 - max: 38200848 + min: 20480 + max: 38428912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jp2k4x84p + job_id: jglvyqml5 job_status: Passed torchscript_onnx_qnn: - inference_time: 85886.0 - throughput: 11.643341173183057 + inference_time: 85873.0 + throughput: 11.645103816100521 estimated_peak_memory_range: - min: 733184 - max: 7979360 + min: 622592 + max: 11024528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jped8o915 + job_id: jgdxdm1lp job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:35:34Z' + timestamp: '2024-12-11T23:00:59Z' - torchscript_onnx_tflite: - inference_time: 2478.0 - throughput: 403.5512510088781 + inference_time: 2507.0 + throughput: 398.8831272437176 estimated_peak_memory_range: - min: 24576 - max: 199440000 + min: 20480 + max: 189310000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jpy1qze7p + job_id: j56y8047p job_status: Passed torchscript_onnx_qnn: - inference_time: 2492.0 - throughput: 401.2841091492777 + inference_time: 2596.0 + throughput: 385.2080123266564 estimated_peak_memory_range: - min: 626688 - max: 2235416 + min: 634880 + max: 1766864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgz382ek5 + job_id: j57ye8yr5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:35:36Z' + timestamp: '2024-12-11T23:01:00Z' - torchscript_onnx_tflite: - inference_time: 3970.0 - throughput: 251.88916876574308 + inference_time: 3983.0 + throughput: 251.06703489831784 estimated_peak_memory_range: min: 16384 - max: 23912592 + max: 25455520 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jp0zd4y65 + job_id: jp3jzr0zg job_status: Passed torchscript_onnx_qnn: - inference_time: 4187.0 - throughput: 238.83448770002389 + inference_time: 4719.0 + throughput: 211.90930281839374 estimated_peak_memory_range: - min: 0 - max: 6018800 + min: 618496 + max: 6662512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j5we8wv65 + job_id: jp4ly2ll5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:35:37Z' + timestamp: '2024-12-11T23:01:02Z' - torchscript_onnx_tflite: - inference_time: 2484.0 - throughput: 402.5764895330113 + inference_time: 2509.0 + throughput: 398.5651654045436 estimated_peak_memory_range: - min: 20480 - max: 210184584 + min: 16384 + max: 199319136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jp8q62oxp + job_id: jgo2l96dp job_status: Passed torchscript_onnx_qnn: - inference_time: 2544.0 - throughput: 393.0817610062893 + inference_time: 2487.0 + throughput: 402.09087253719343 estimated_peak_memory_range: - min: 638976 - max: 1999288 + min: 626688 + max: 2184296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jg9lk01lg + job_id: jpxklzk95 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:35:38Z' + timestamp: '2024-12-11T23:01:03Z' - torchscript_onnx_tflite: - inference_time: 4605.0 - throughput: 217.15526601520088 + inference_time: 4600.0 + throughput: 217.3913043478261 estimated_peak_memory_range: min: 16384 - max: 38091232 + max: 39750336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jgkeovz2g + job_id: jpv6lnkm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 4690.0 - throughput: 213.21961620469082 + inference_time: 4678.0 + throughput: 213.76656690893543 estimated_peak_memory_range: min: 622592 - max: 6306528 + max: 6620032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jp1472l2p + job_id: j5mn0lnqp job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:35:39Z' + timestamp: '2024-12-11T23:01:04Z' - torchscript_onnx_tflite: - inference_time: 3314.0 - throughput: 301.75015087507546 + inference_time: 3260.0 + throughput: 306.7484662576687 estimated_peak_memory_range: min: 16384 - max: 30219600 + max: 29415952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: j5q6z084p + job_id: jgjvr8n8g job_status: Passed torchscript_onnx_qnn: - inference_time: 3364.0 - throughput: 297.2651605231867 + inference_time: 3386.0 + throughput: 295.33372711163616 estimated_peak_memory_range: min: 618496 - max: 25244592 + max: 31750880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgdx8n9ep + job_id: jgn6zw6m5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:35:41Z' + timestamp: '2024-12-11T23:01:05Z' - torchscript_onnx_qnn: - inference_time: 2676.0 - throughput: 373.69207772795215 + inference_time: 2680.0 + throughput: 373.13432835820896 estimated_peak_memory_range: min: 602112 max: 602112 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jpv6eq3j5 + job_id: jp14nmzlp job_status: Passed torchscript_onnx: - inference_time: 2648.0 - throughput: 377.64350453172204 + inference_time: 2653.0 + throughput: 376.9317753486619 estimated_peak_memory_range: - min: 54325248 - max: 54325248 + min: 54579200 + max: 54579200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jgdx8n9rp + job_id: jp0zmxze5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:35:45Z' + timestamp: '2024-12-11T23:01:09Z' diff --git a/qai_hub_models/models/resnext50_quantized/evaluate.py b/qai_hub_models/models/resnext50_quantized/evaluate.py index 87f75d68..f5e31235 100644 --- a/qai_hub_models/models/resnext50_quantized/evaluate.py +++ b/qai_hub_models/models/resnext50_quantized/evaluate.py @@ -25,6 +25,7 @@ def main(): model_cls=Model, default_split_size=2500, supported_datasets=SUPPORTED_DATASETS, + supports_onnx=False, is_hub_quantized=True, ) args = parser.parse_args() diff --git a/qai_hub_models/models/resnext50_quantized/export.py b/qai_hub_models/models/resnext50_quantized/export.py index 7d00c9a3..a13dc9a9 100644 --- a/qai_hub_models/models/resnext50_quantized/export.py +++ b/qai_hub_models/models/resnext50_quantized/export.py @@ -241,7 +241,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, is_hub_quantized=True) + parser = export_parser(model_cls=Model, supports_onnx=False, is_hub_quantized=True) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/resnext50_quantized/info.yaml b/qai_hub_models/models/resnext50_quantized/info.yaml index 2bf3f2d6..2caba593 100644 --- a/qai_hub_models/models/resnext50_quantized/info.yaml +++ b/qai_hub_models/models/resnext50_quantized/info.yaml @@ -11,6 +11,7 @@ use_case: Image Classification tags: - backbone - quantized +imsdk_supported: true research_paper: https://arxiv.org/abs/1611.05431 research_paper_title: Aggregated Residual Transformations for Deep Neural Networks license: https://github.com/pytorch/vision/blob/main/LICENSE diff --git a/qai_hub_models/models/resnext50_quantized/perf.yaml b/qai_hub_models/models/resnext50_quantized/perf.yaml index 2d763bd7..08f82eb5 100644 --- a/qai_hub_models/models/resnext50_quantized/perf.yaml +++ b/qai_hub_models/models/resnext50_quantized/perf.yaml @@ -50,14 +50,14 @@ aggregated: models: - name: ResNeXt50Quantized universal_assets: - torchscript_onnx_tflite: mq3e1jl3m + torchscript_onnx_tflite: mm5edo29m performance_metrics: - torchscript_onnx_tflite: - inference_time: 921.0 - throughput: 1085.7763300760043 + inference_time: 911.0 + throughput: 1097.694840834248 estimated_peak_memory_range: min: 16384 - max: 65241936 + max: 56607480 primary_compute_unit: NPU precision: int8 layer_info: @@ -65,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jg9lk3qqg + job_id: jpxklj9j5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1180.0 - throughput: 847.457627118644 + inference_time: 1178.0 + throughput: 848.8964346349745 estimated_peak_memory_range: - min: 16384 - max: 65626064 + min: 36864 + max: 12974584 primary_compute_unit: NPU precision: int8 layer_info: @@ -80,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jp8q63dzp + job_id: jp3jz3vxg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -89,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T03:46:44Z' + timestamp: '2024-12-12T01:15:29Z' - torchscript_onnx_tflite: - inference_time: 687.0 - throughput: 1455.604075691412 + inference_time: 671.0 + throughput: 1490.312965722802 estimated_peak_memory_range: min: 12288 - max: 36813424 + max: 39562992 primary_compute_unit: NPU precision: int8 layer_info: @@ -103,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jp147dmkp + job_id: j5mn02eyp job_status: Passed torchscript_onnx_qnn: - inference_time: 890.0 - throughput: 1123.5955056179776 + inference_time: 882.0 + throughput: 1133.7868480725624 estimated_peak_memory_range: min: 167936 - max: 38796144 + max: 40906016 primary_compute_unit: NPU precision: int8 layer_info: @@ -118,7 +118,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jgkeolwyg + job_id: jgo2l0k4p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -127,13 +127,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T03:46:46Z' + timestamp: '2024-12-12T01:15:31Z' - torchscript_onnx_tflite: inference_time: 647.0 throughput: 1545.595054095827 estimated_peak_memory_range: - min: 8192 - max: 31764496 + min: 12288 + max: 31602864 primary_compute_unit: NPU precision: int8 layer_info: @@ -141,14 +141,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jgdx8rmkp + job_id: jgn6zylv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 854.0 - throughput: 1170.96018735363 + inference_time: 851.0 + throughput: 1175.0881316098707 estimated_peak_memory_range: min: 0 - max: 34458896 + max: 37764864 primary_compute_unit: NPU precision: int8 layer_info: @@ -156,7 +156,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: j5q6z7x7p + job_id: jpv6lo075 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -165,13 +165,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T03:46:48Z' + timestamp: '2024-12-12T01:15:33Z' - torchscript_onnx_tflite: - inference_time: 2903.0 - throughput: 344.47123665173956 + inference_time: 2963.0 + throughput: 337.4957813027337 estimated_peak_memory_range: min: 12288 - max: 36629824 + max: 38867712 primary_compute_unit: NPU precision: int8 layer_info: @@ -179,14 +179,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: j57ykj8q5 + job_id: jprvlq8vg job_status: Passed torchscript_onnx_qnn: - inference_time: 4703.0 - throughput: 212.63023601956198 + inference_time: 4588.0 + throughput: 217.9598953792502 estimated_peak_memory_range: - min: 335872 - max: 8609728 + min: 163840 + max: 7826992 primary_compute_unit: NPU precision: int8 layer_info: @@ -194,7 +194,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jglvo09e5 + job_id: jgjvrmz7g job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -203,13 +203,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T03:46:28Z' + timestamp: '2024-12-12T01:15:34Z' - torchscript_onnx_tflite: - inference_time: 66460.0 - throughput: 15.04664459825459 + inference_time: 51428.0 + throughput: 19.444660496227737 estimated_peak_memory_range: - min: 12288 - max: 136582632 + min: 40960 + max: 90907288 primary_compute_unit: GPU precision: int8 layer_info: @@ -217,7 +217,7 @@ models: layers_on_gpu: 57 layers_on_cpu: 11 total_layers: 82 - job_id: jp4lmx2q5 + job_id: jp2kr60xp job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -226,13 +226,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T03:46:08Z' + timestamp: '2024-12-12T01:15:15Z' - torchscript_onnx_tflite: - inference_time: 909.0 - throughput: 1100.1100110011 + inference_time: 927.0 + throughput: 1078.7486515641856 estimated_peak_memory_range: - min: 16384 - max: 55111984 + min: 20480 + max: 65238776 primary_compute_unit: NPU precision: int8 layer_info: @@ -240,14 +240,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jpxk37zj5 + job_id: jpy1owrrp job_status: Passed torchscript_onnx_qnn: - inference_time: 1147.0 - throughput: 871.8395815170009 + inference_time: 1139.0 + throughput: 877.9631255487269 estimated_peak_memory_range: - min: 204800 - max: 1557576 + min: 184320 + max: 1681600 primary_compute_unit: NPU precision: int8 layer_info: @@ -255,7 +255,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: j56yr39vp + job_id: jped71e75 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -264,13 +264,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T03:46:30Z' + timestamp: '2024-12-12T01:15:36Z' - torchscript_onnx_tflite: - inference_time: 10256.0 - throughput: 97.50390015600624 + inference_time: 10323.0 + throughput: 96.8710646130001 estimated_peak_memory_range: - min: 0 - max: 31498944 + min: 16384 + max: 31393632 primary_compute_unit: NPU precision: int8 layer_info: @@ -278,14 +278,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: j5mnowlyp + job_id: jp0zmq325 job_status: Passed torchscript_onnx_qnn: - inference_time: 10835.0 - throughput: 92.29349330872174 + inference_time: 10807.0 + throughput: 92.53261774775608 estimated_peak_memory_range: - min: 98304 - max: 5982480 + min: 94208 + max: 10464672 primary_compute_unit: NPU precision: int8 layer_info: @@ -293,7 +293,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jgo2o174p + job_id: j5welv2z5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -302,13 +302,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T03:46:34Z' + timestamp: '2024-12-12T01:15:40Z' - torchscript_onnx_tflite: - inference_time: 918.0 - throughput: 1089.3246187363834 + inference_time: 907.0 + throughput: 1102.5358324145534 estimated_peak_memory_range: - min: 16384 - max: 56978760 + min: 20480 + max: 67662712 primary_compute_unit: NPU precision: int8 layer_info: @@ -316,14 +316,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jgn6o9dv5 + job_id: jp8qe90zp job_status: Passed torchscript_onnx_qnn: - inference_time: 1145.0 - throughput: 873.3624454148471 + inference_time: 1141.0 + throughput: 876.4241893076249 estimated_peak_memory_range: min: 172032 - max: 1548648 + max: 1603240 primary_compute_unit: NPU precision: int8 layer_info: @@ -331,7 +331,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jpv6e1y75 + job_id: jg9lz1jqg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -340,13 +340,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T03:46:35Z' + timestamp: '2024-12-12T01:15:41Z' - torchscript_onnx_tflite: - inference_time: 1466.0 - throughput: 682.1282401091405 + inference_time: 1458.0 + throughput: 685.8710562414266 estimated_peak_memory_range: min: 16384 - max: 29460960 + max: 34652048 primary_compute_unit: NPU precision: int8 layer_info: @@ -354,14 +354,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jprvo4mvg + job_id: jgke2n7yg job_status: Passed torchscript_onnx_qnn: - inference_time: 1815.0 - throughput: 550.9641873278237 + inference_time: 1818.0 + throughput: 550.05500550055 estimated_peak_memory_range: min: 0 - max: 5991056 + max: 6019200 primary_compute_unit: NPU precision: int8 layer_info: @@ -369,7 +369,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jgjvo067g + job_id: jp14nlykp job_status: Passed reference_device_info: name: SA8295P ADP @@ -378,13 +378,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T03:46:37Z' + timestamp: '2024-12-12T01:15:43Z' - torchscript_onnx_tflite: - inference_time: 925.0 - throughput: 1081.081081081081 + inference_time: 920.0 + throughput: 1086.9565217391305 estimated_peak_memory_range: - min: 16384 - max: 65227776 + min: 32768 + max: 67197712 primary_compute_unit: NPU precision: int8 layer_info: @@ -392,14 +392,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jp2k47qxp + job_id: j5q6lke7p job_status: Passed torchscript_onnx_qnn: - inference_time: 1144.0 - throughput: 874.1258741258741 + inference_time: 1143.0 + throughput: 874.8906386701663 estimated_peak_memory_range: min: 180224 - max: 1580032 + max: 1333600 primary_compute_unit: NPU precision: int8 layer_info: @@ -407,7 +407,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jped8r075 + job_id: jgdxd9ekp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -416,13 +416,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T03:46:39Z' + timestamp: '2024-12-12T01:15:45Z' - torchscript_onnx_tflite: - inference_time: 1382.0 - throughput: 723.589001447178 + inference_time: 1374.0 + throughput: 727.802037845706 estimated_peak_memory_range: min: 16384 - max: 31747440 + max: 35797168 primary_compute_unit: NPU precision: int8 layer_info: @@ -430,14 +430,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jpy1q4krp + job_id: jglvyz6e5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1853.0 - throughput: 539.6654074473827 + inference_time: 1861.0 + throughput: 537.345513164965 estimated_peak_memory_range: - min: 172032 - max: 5999296 + min: 163840 + max: 6165312 primary_compute_unit: NPU precision: int8 layer_info: @@ -445,7 +445,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jgz38xqz5 + job_id: j57yew0q5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -454,13 +454,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T03:46:41Z' + timestamp: '2024-12-12T01:15:47Z' - torchscript_onnx_tflite: - inference_time: 1101.0 - throughput: 908.2652134423251 + inference_time: 1072.0 + throughput: 932.8358208955224 estimated_peak_memory_range: - min: 20480 - max: 38693008 + min: 16384 + max: 41882224 primary_compute_unit: NPU precision: int8 layer_info: @@ -468,14 +468,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jp0zd1825 + job_id: j56y8jevp job_status: Passed torchscript_onnx_qnn: - inference_time: 1377.0 - throughput: 726.2164124909223 + inference_time: 1428.0 + throughput: 700.2801120448179 estimated_peak_memory_range: min: 167936 - max: 38314656 + max: 42853680 primary_compute_unit: NPU precision: int8 layer_info: @@ -483,7 +483,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: j5we8d0z5 + job_id: jp4lyokq5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -492,13 +492,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T03:46:43Z' + timestamp: '2024-12-12T01:15:48Z' - torchscript_onnx_qnn: - inference_time: 1273.0 - throughput: 785.5459544383347 + inference_time: 1264.0 + throughput: 791.1392405063291 estimated_peak_memory_range: - min: 425984 - max: 425984 + min: 458752 + max: 458752 primary_compute_unit: NPU precision: int8 layer_info: @@ -506,7 +506,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jp3jx4lxg + job_id: jgz3l9oz5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -515,4 +515,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T03:46:50Z' + timestamp: '2024-12-12T01:15:38Z' diff --git a/qai_hub_models/models/sam/app.py b/qai_hub_models/models/sam/app.py index de75dc4a..75981273 100644 --- a/qai_hub_models/models/sam/app.py +++ b/qai_hub_models/models/sam/app.py @@ -4,12 +4,24 @@ # --------------------------------------------------------------------- from __future__ import annotations -from typing import no_type_check +from enum import Enum +from typing import Callable import numpy as np import torch +from PIL.Image import Image -from qai_hub_models.models.sam.model import SAMQAIHMWrapper +from qai_hub_models.models.sam.model import ResizeLongestSide +from qai_hub_models.models.sam.model_patches import mask_postprocessing as upscale_masks +from qai_hub_models.utils.image_processing import ( + numpy_image_to_torch, + preprocess_PIL_image, +) + + +class SAMInputImageLayout(Enum): + RGB = 0 + BGR = 1 class SAMApp: @@ -17,117 +29,228 @@ class SAMApp: This class consists of light-weight "app code" that is required to perform end to end inference with Segment-Anything Model. The app uses 2 models: - * encoder (Given input image, emmits image embeddings to be used by decoder) - * decoder (Lightweight decoder, modified to accept and work with fix image size) - - For a given image input, the app will: - * Prepare: Runs encoder on given image and creates and caches embeddings - * Generate masks: Uses cached embeddings and generate masks for given points + * encoder (Given input image, emits image embeddings to be used by decoder) + * decoder (image embeddings --> predicted segmentation masks) """ - @no_type_check - def __init__(self, model: SAMQAIHMWrapper): - self.orig_img_size = None - self.image_embeddings = None - self.sam_qaihm_wrapper = model - self.sam_encoder = self.sam_qaihm_wrapper.get_sam_encoder() - self.sam_decoder = None - - def prepare(self, input_image: np.ndarray, single_mask_mode=True): + def __init__( + self, + encoder_input_img_size: int, + mask_threshold: float, + input_image_channel_layout: SAMInputImageLayout, + sam_encoder_splits: list[Callable[[torch.Tensor], torch.Tensor]], + sam_decoder: Callable[ + [torch.Tensor, torch.Tensor, torch.Tensor], + tuple[torch.Tensor, torch.Tensor], + ], + ): """ - Prepares App for segmentation of given input image - - Pre-processes input image - - Initiate Decoder with input image size - Parameters: - input_image: np.ndarry - Input RGB image loaded as numpy array. - single_mask_mode: bool - Set decoder to return single mask for given points. - """ - if self.sam_encoder is None: - self.sam_encoder = self.sam_qaihm_wrapper.get_sam_encoder() + encoder_input_img_size: int + The input dimension for images passed the encoder. Height and width are always the same, hence 1 value here. - preprocessed_image = self.sam_encoder.preprocess_input_image(input_image) - self.image_embeddings = self.sam_encoder(preprocessed_image) + mask_threshold: + Numerical threshold for a pixel in a mask to be considered a positive. - # Initialize decoder - self.orig_img_size = input_image.shape[:2] - self.sam_decoder = self.sam_qaihm_wrapper.get_sam_decoder( - self.orig_img_size, single_mask_mode - ) + input_image_channel_layout: SAMInputImageLayout + Channel layout ("RGB" or "BGR") expected by the encoder. - def reset(self): - """Reset app state""" - self.image_embeddings = None - self.orig_img_size = None - self.sam_decoder = None + sam_encoder_splits: + SAM encoder split into parts. Must match input & output of each model part generated by qai_hub_models.models.sam.model.SAMEncoderPart - def preprocess_point_coordinates( - self, input_coords: np.ndarray, image_shape: tuple[int, int] - ): - """Peprocesses Point coordinates to work with decoder""" - if self.sam_encoder is None: - raise RuntimeError("Encoder is not intialized. Please run `app.prepare`.") - return torch.Tensor( - self.sam_encoder.transforms.apply_coords(input_coords, image_shape) - ) + sam_decoder: + SAM decoder. Must match input and output of qai_hub_models.models.sam.model.SAMDecoder + Note that "mask_input" in forward() is not used by this app, so the decoder requires only 3 inputs rather than 4. + """ + self.sam_encoder_splits = sam_encoder_splits + self.sam_decoder = sam_decoder + self.mask_threshold = mask_threshold + self.encoder_input_img_size = encoder_input_img_size + self.input_img_size_transform = ResizeLongestSide(encoder_input_img_size) + self.input_image_channel_layout = input_image_channel_layout def predict(self, *args, **kwargs): - # See generate_mask_from_points. - return self.generate_mask_from_points(*args, **kwargs) + return self.predict_mask_from_points(*args, **kwargs) - def generate_mask_from_points( + def predict_mask_from_points( self, + pixel_values_or_image: torch.Tensor | np.ndarray | Image | list[Image], point_coords: torch.Tensor, point_labels: torch.Tensor, - ) -> torch.Tensor: + return_logits: bool = False, + ) -> tuple[torch.Tensor, torch.Tensor]: """ - Generate masks from given points + Predict segmentation masks from given points and image(s). Parameters: - point_coords: torch.Tensor of shape [k, 2] + pixel_values_or_image: torch.Tensor + PIL image + or + numpy array (N H W C x uint8) or (H W C x uint8) + channel layout consistent with self.input_image_channel_layout + or + pyTorch tensor (N C H W x int8, value range is [0, 255]) + channel layout consistent with self.input_image_channel_layout + + point_coords: torch.Tensor of shape [k, 2] or [b, k, 2] Point coordinates from input image for segmentation - point_labels: torch.Tensor of shape [k] + + point_labels: torch.Tensor of shape [k] or [b, k] Point Labels to select/de-select given point for segmentation e.g. Corresponding value is 1 if this point is to be included, otherwise 0 + + return_logits: bool + If False, returns boolean masks. If true, returns raw fp32 mask predictions. + Returns: - upscaled_masks: torch.Tensor of shape [1, k, ] - score: torch.Tensor of shape [1, k] - masks: torch.Tensor of shape [1, k, 256, 256] - Use this low resolution masks to further slice and upscale for resolutions that Decoder is not intialized to. + upscaled_masks: torch.Tensor of shape [b, k, ]. + See parameter return_logits for type info + + scores: torch.Tensor of shape [b, k] + Mask confidence score Where, k = number of points + b = number of input images + """ + image_embeddings, input_images_original_size = self.predict_embeddings( + pixel_values_or_image + ) + return self.predict_mask_from_points_and_embeddings( + image_embeddings, + input_images_original_size, + point_coords, + point_labels, + return_logits, + ) + + def predict_embeddings( + self, pixel_values_or_image: torch.Tensor | np.ndarray | Image | list[Image] + ): """ - if self.sam_decoder is None: - raise RuntimeError( - "Please call `prepare_from_image` or `prepare` before calling `segment`." + Predict embeddings from given image. + + Parameters: + pixel_values_or_image: torch.Tensor + PIL image + or + numpy array (N H W C x uint8) or (H W C x uint8) + channel layout consistent with self.input_image_channel_layout + or + pyTorch tensor (N C H W x int8, value range is [0, 255]) + channel layout consistent with self.input_image_channel_layout + + Returns: + image_embeddings: torch.Tensor of shape [b, k, ] + image embeddings + + input_images_original_size: tuple[int, int] + Original size of input image (BEFORE reshape to fit encoder input size) + + Where, + k = number of points + b = number of input images + + Discussion: + It is faster to run this once on an image (compared to the entire encoder / decoder pipeline) + if masks will be predicted several times on the same image. + """ + # Translate input to torch tensor of shape [N, C, H, W] + if isinstance(pixel_values_or_image, Image): + pixel_values_or_image = [pixel_values_or_image] + if isinstance(pixel_values_or_image, list): + NCHW_int8_torch_frames = torch.cat( + [ + preprocess_PIL_image( + x.convert(self.input_image_channel_layout.name), False + ) + for x in pixel_values_or_image + ] ) + elif isinstance(pixel_values_or_image, np.ndarray): + NCHW_int8_torch_frames = numpy_image_to_torch(pixel_values_or_image, False) + else: + NCHW_int8_torch_frames = pixel_values_or_image - # Prepare inputs for decoder - # Preprocess point co-ordinates for decoder - point_coords = self.preprocess_point_coordinates( - np.expand_dims(np.array(point_coords), 0), self.orig_img_size + # Resize input image to the encoder's desired input size. + input_images_original_size = ( + NCHW_int8_torch_frames.shape[2], + NCHW_int8_torch_frames.shape[3], + ) + input_images = self.input_img_size_transform.apply_image_torch( + NCHW_int8_torch_frames ) - point_labels = torch.Tensor(point_labels).unsqueeze(0) - mask_input = torch.zeros(self.sam_decoder.get_input_spec()["mask_input"][0]) - has_mask_input = torch.zeros((1,)) - upscaled_masks, scores, masks = self.sam_decoder( - self.image_embeddings, - point_coords, - point_labels, - mask_input, - has_mask_input, + # Normalize input to [0, 1] (must be done after resize) + input_images = input_images / 255.0 + + # Run encoder + image_embeddings = input_images + for encoder_part in self.sam_encoder_splits: + image_embeddings = encoder_part(image_embeddings) + + return image_embeddings, input_images_original_size + + def predict_mask_from_points_and_embeddings( + self, + image_embeddings: torch.Tensor, + input_images_original_size: tuple[int, int], + point_coords: torch.Tensor, + point_labels: torch.Tensor, + return_logits: bool = False, + ): + """ + Predict segmentation masks from given points and image embeddings. + + Parameters: + image_embeddings: torch.Tensor of shape [b, k, ] + image embeddings + + input_images_original_size: tuple[int, int] + Original size of input image (BEFORE reshape to fit encoder input size) + + point_coords: torch.Tensor of shape [k, 2] or [b, k, 2] + Point coordinates from input image for segmentation. + + point_labels: torch.Tensor of shape [k] or [b, k] + Point Labels to select/de-select given point for segmentation + e.g. Corresponding value is 1 if this point is to be included, otherwise 0 + + return_logits: bool + If False, returns boolean masks. If true, returns raw fp32 mask predictions. + + Returns: + upscaled_masks: torch.Tensor of shape [b, k, ]. + See parameter return_logits for type info + + scores: torch.Tensor of shape [b, k] + Mask confidence score + + Where, + k = number of points + b = number of input images + """ + # Expand point_coords and point_labels to include a batch dimension, if necessary + if len(point_coords.shape) == 2: + point_coords = torch.unsqueeze(point_coords, 0) + if len(point_labels.shape) == 1: + point_labels = torch.unsqueeze(point_labels, 0) + + # Change point coordinates to map to the same pixel in the resized image. + point_coords = self.input_img_size_transform.apply_coords_torch( + point_coords, input_images_original_size ) - # Reduce noise from generated masks - upscaled_masks = self.postprocess_mask(upscaled_masks) - masks = self.postprocess_mask(masks) + # Run decoder + masks, scores = self.sam_decoder(image_embeddings, point_coords, point_labels) + + # Upscale masks + upscaled_masks = upscale_masks( + masks, self.encoder_input_img_size, input_images_original_size + ) - return upscaled_masks, scores, masks + # Apply mask threshold + if not return_logits: + upscaled_masks = upscaled_masks > self.mask_threshold - def postprocess_mask(self, generated_mask: torch.Tensor): - """Drop masks lower than threshold to minimize noise""" - return generated_mask > self.sam_qaihm_wrapper.get_sam().mask_threshold + return upscaled_masks, scores diff --git a/qai_hub_models/models/sam/conftest.py b/qai_hub_models/models/sam/conftest.py index a6ac0d38..6f45413d 100644 --- a/qai_hub_models/models/sam/conftest.py +++ b/qai_hub_models/models/sam/conftest.py @@ -9,7 +9,6 @@ import pytest from qai_hub_models.models.sam import Model -from qai_hub_models.utils.testing import skip_clone_repo_check # Instantiate the model only once for all tests. @@ -22,7 +21,6 @@ def cached_from_pretrained(): from_pretrained = Model.from_pretrained sig = inspect.signature(from_pretrained) - @skip_clone_repo_check def _cached_from_pretrained(*args, **kwargs): cache_key = str(args) + str(kwargs) model = pretrained_cache.get(cache_key, None) diff --git a/qai_hub_models/models/sam/demo.py b/qai_hub_models/models/sam/demo.py index 44bf5086..ed3d99db 100644 --- a/qai_hub_models/models/sam/demo.py +++ b/qai_hub_models/models/sam/demo.py @@ -4,9 +4,9 @@ # --------------------------------------------------------------------- import argparse -import numpy as np +import torch -from qai_hub_models.models.sam.app import SAMApp +from qai_hub_models.models.sam.app import SAMApp, SAMInputImageLayout from qai_hub_models.models.sam.model import ( DEFAULT_MODEL_TYPE, MODEL_ASSET_VERSION, @@ -42,7 +42,7 @@ def main(is_test: bool = False): parser.add_argument( "--point-coordinates", type=str, - default="500,375;", + default="1342,1011;", help="Comma separated x and y coordinate. Multiple coordinate separated by `;`." " e.g. `x1,y1;x2,y2`. Default: `500,375;`", ) @@ -57,15 +57,17 @@ def main(is_test: bool = False): coordinates = list(filter(None, args.point_coordinates.split(";"))) # Load Application - app = SAMApp(SAMQAIHMWrapper.from_pretrained(model_type=args.model_type)) + wrapper = SAMQAIHMWrapper.from_pretrained(model_type=args.model_type) + app = SAMApp( + wrapper.sam.image_encoder.img_size, + wrapper.sam.mask_threshold, + SAMInputImageLayout[wrapper.sam.image_format], + wrapper.encoder_splits, + wrapper.decoder, + ) # Load Image image = load_image(args.image) - image_data = np.asarray(image) - - # Prepare SAM for decoder for given input image: - # i.e. run SAM encoder to generate and cache image embeddings - app.prepare(image_data, single_mask_mode=args.single_mask_mode) # Point segmentation using decoder print("\n** Performing point segmentation **\n") @@ -86,7 +88,9 @@ def main(is_test: bool = False): input_labels.append(1) # Generate masks with given input points - generated_mask, *_ = app.generate_mask_from_points(input_coords, input_labels) + generated_mask, *_ = app.predict_mask_from_points( + image, torch.Tensor(input_coords), torch.Tensor(input_labels) + ) if not is_test: show_image(image, generated_mask) diff --git a/qai_hub_models/models/sam/export.py b/qai_hub_models/models/sam/export.py index b0ad7e26..b7083e82 100644 --- a/qai_hub_models/models/sam/export.py +++ b/qai_hub_models/models/sam/export.py @@ -32,7 +32,15 @@ export_without_hub_access, ) -ALL_COMPONENTS = ["SAMDecoder", "SAMEncoder"] +ALL_COMPONENTS = [ + "SAMDecoder", + "SAMEncoderPart1", + "SAMEncoderPart2", + "SAMEncoderPart3", + "SAMEncoderPart4", + "SAMEncoderPart5", + "SAMEncoderPart6", +] def export_model( @@ -125,9 +133,19 @@ def export_model( model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs)) components_dict: dict[str, BaseModel] = {} if "SAMDecoder" in components: - components_dict["SAMDecoder"] = model.get_sam_decoder() # type: ignore - if "SAMEncoder" in components: - components_dict["SAMEncoder"] = model.get_sam_encoder() # type: ignore + components_dict["SAMDecoder"] = model.decoder # type: ignore + if "SAMEncoderPart1" in components: + components_dict["SAMEncoderPart1"] = model.encoder_splits[0] # type: ignore + if "SAMEncoderPart2" in components: + components_dict["SAMEncoderPart2"] = model.encoder_splits[1] # type: ignore + if "SAMEncoderPart3" in components: + components_dict["SAMEncoderPart3"] = model.encoder_splits[2] # type: ignore + if "SAMEncoderPart4" in components: + components_dict["SAMEncoderPart4"] = model.encoder_splits[3] # type: ignore + if "SAMEncoderPart5" in components: + components_dict["SAMEncoderPart5"] = model.encoder_splits[4] # type: ignore + if "SAMEncoderPart6" in components: + components_dict["SAMEncoderPart6"] = model.encoder_splits[5] # type: ignore compile_jobs: dict[str, hub.client.CompileJob] = {} for component_name, component in components_dict.items(): @@ -248,11 +266,9 @@ def export_model( def main(): warnings.filterwarnings("ignore") + device = "Snapdragon 8 Elite QRD" parser = export_parser( - model_cls=Model, - components=ALL_COMPONENTS, - supports_qnn=False, - supports_onnx=False, + model_cls=Model, default_export_device=device, components=ALL_COMPONENTS ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/sam/model.py b/qai_hub_models/models/sam/model.py index 815f5aa2..383b3662 100644 --- a/qai_hub_models/models/sam/model.py +++ b/qai_hub_models/models/sam/model.py @@ -4,21 +4,31 @@ # --------------------------------------------------------------------- from __future__ import annotations -from collections.abc import Callable +import functools from pathlib import Path +from typing import cast import numpy as np import torch +from qai_hub_models.models.sam.model_patches import ( + MODEL_ASSET_VERSION, + MODEL_ID, + SAM_SOURCE_REPO, + SAM_SOURCE_REPO_COMMIT, + Conv2DInplaceLinearSAMMaskDecoderMLP, + Conv2DInplaceLinearSAMTransformerMLPBlock, + SAMMaskDecoderMLP, + SplitHeadSAMDecoderAttention, + SplitHeadSAMEncoderAttention, + sam_decoder_predict_masks, + window_partition_5d, + window_unpartition_5d, +) from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, SourceAsRoot from qai_hub_models.utils.base_model import BaseModel, CollectionModel from qai_hub_models.utils.input_spec import InputSpec -# This is a fork of https://github.com/facebookresearch/segment-anything -# with changes to make the SAM decoder traceable -SAM_SOURCE_REPO = "https://github.com/dmckinnon/segment-anything" -SAM_SOURCE_REPO_COMMIT = "0bc06e062ca883c2524bfa79061807c535eb0d51" -MODEL_ID = __name__.split(".")[-2] DEFAULT_MODEL_TYPE = "vit_l" SMALL_MODEL_TYPE = "vit_b" MODEL_REGISTERY = { @@ -26,91 +36,235 @@ "vit_l": "sam_vit_l_0b3195.pth", # 308M params "vit_h": "sam_vit_h_4b8939.pth", # 636M params } -MODEL_ASSET_VERSION = 1 + +with SourceAsRoot( + SAM_SOURCE_REPO, + SAM_SOURCE_REPO_COMMIT, + MODEL_ID, + MODEL_ASSET_VERSION, +): + from segment_anything import SamPredictor, sam_model_registry # noqa: F401 + from segment_anything.modeling import image_encoder as sam_image_encoder + from segment_anything.modeling.image_encoder import Block as SAM_Encoder_Block + from segment_anything.modeling.sam import Sam + from segment_anything.modeling.transformer import ( + TwoWayAttentionBlock, + TwoWayTransformer, + ) + from segment_anything.utils.onnx import SamOnnxModel + from segment_anything.utils.transforms import ResizeLongestSide # noqa: F401 + + +# Patch Encoder to use 5D Window Partition (rather than 6D) +setattr(sam_image_encoder, "window_partition", window_partition_5d) +setattr(sam_image_encoder, "window_unpartition", window_unpartition_5d) class SAMQAIHMWrapper(CollectionModel): """ - QAIHM version of segment-anything (https://github.com/dmckinnon/segment-anything) - - QAIHM fork modifies following from parent segment-anything repo: - 1. window_partition in encoder works on rank-5 tensor instead of rank-6 tensor - 2. SamOnnxModel accepts `orig_img_size` to use static upsample instead of dynamic upsample + "Holding class" for all pieces of SAM. """ def __init__( self, - sam: torch.nn.Module, - sam_encoder: Callable, - SamOnnxModel, - ResizeLongestSide, - SamPredictor, + sam: Sam, + num_encoder_splits: int, + single_mask_mode: bool = True, ): self.sam = sam - self.sam_encoder = sam_encoder - self.SamOnnxModel = SamOnnxModel - self.ResizeLongestSide = ResizeLongestSide - self.SamPredictor = SamPredictor - - def get_sam(self) -> torch.nn.Module: - return self.sam - - def get_sam_encoder(self) -> Callable: - return self.sam_encoder - - # Create a new decoder - def get_sam_decoder( - self, orig_img_size: tuple[int, int] = (720, 1280), single_mask_mode=True - ) -> Callable: - self.sam_decoder = SegmentAnythingONNXDecoder( - self, - single_mask_mode=single_mask_mode, - orig_img_size=orig_img_size, + self.encoder_splits = SAMEncoderPart.create_with_splits(sam, num_encoder_splits) + self.decoder = SAMDecoder(self.sam, return_single_mask=single_mask_mode) + + @classmethod + def from_pretrained( + cls, model_type: str = SMALL_MODEL_TYPE, num_encoder_splits=5 + ) -> SAMQAIHMWrapper: + sam: Sam = sam_model_registry[model_type](_get_weights_path(model_type)) + + # Normalize pixel_mean and pixel_std for fp ([0, 1]) input + sam.pixel_mean = sam.pixel_mean / 255.0 # [0-255] -> [0, 1] + sam.pixel_std = sam.pixel_std / 255.0 # [0-255] -> [0, 1] + + ### + # Patch the graph for compatibility with QNN. + # + # All below optimizations either optimize for QNN inference speed, + # or fix failures that occur when compiling to QNN. + ### + for block in sam.image_encoder.blocks: + assert isinstance(block, SAM_Encoder_Block) + block.mlp = Conv2DInplaceLinearSAMTransformerMLPBlock(block.mlp) # type: ignore + block.attn = SplitHeadSAMEncoderAttention(block.attn) # type: ignore + + sam.mask_decoder.predict_masks = functools.partial( + sam_decoder_predict_masks, sam.mask_decoder ) - return self.sam_decoder + for i in range(0, len(sam.mask_decoder.output_hypernetworks_mlps)): + mlp = cast(SAMMaskDecoderMLP, sam.mask_decoder.output_hypernetworks_mlps[i]) + sam.mask_decoder.output_hypernetworks_mlps[ + i + ] = Conv2DInplaceLinearSAMMaskDecoderMLP(mlp) + sam.mask_decoder.iou_prediction_head = Conv2DInplaceLinearSAMMaskDecoderMLP(sam.mask_decoder.iou_prediction_head) # type: ignore + + transformer = cast(TwoWayTransformer, sam.mask_decoder.transformer) + transformer.final_attn_token_to_image = SplitHeadSAMDecoderAttention(transformer.final_attn_token_to_image) # type: ignore + for block in transformer.layers: + assert isinstance(block, TwoWayAttentionBlock) + block.self_attn = SplitHeadSAMDecoderAttention(block.self_attn) # type: ignore + block.cross_attn_token_to_image = SplitHeadSAMDecoderAttention(block.cross_attn_token_to_image) # type: ignore + block.cross_attn_image_to_token = SplitHeadSAMDecoderAttention(block.cross_attn_image_to_token) # type: ignore + block.mlp = Conv2DInplaceLinearSAMTransformerMLPBlock(block.mlp) # type: ignore + + return cls(sam, num_encoder_splits) + + +class SAMEncoderPart(BaseModel): + """Exportable SAM encoder that can be split into several parts.""" @classmethod - def from_pretrained(cls, model_type: str = DEFAULT_MODEL_TYPE) -> SAMQAIHMWrapper: - with SourceAsRoot( - SAM_SOURCE_REPO, SAM_SOURCE_REPO_COMMIT, MODEL_ID, MODEL_ASSET_VERSION - ): - from segment_anything import SamPredictor, sam_model_registry - from segment_anything.utils.onnx import SamOnnxModel - from segment_anything.utils.transforms import ResizeLongestSide + def create_with_splits(cls, sam: Sam, num_splits: int) -> list[SAMEncoderPart]: + """ + Split the encoder into parts by assigning each an equal portion of N sequential attention blocks. - sam = sam_model_registry[model_type](_get_weights_path(model_type)) - sam_encoder = SegmentAnythingEncoder(sam, ResizeLongestSide) - return cls(sam, sam_encoder, SamOnnxModel, ResizeLongestSide, SamPredictor) + Returns num_splits encoders, each with the same number of attention blocks. + The encoders will be returned in the order they should be executed. - def __call__(self, image: torch.Tensor) -> torch.Tensor: - raise NotImplementedError("Cannot call SAMQAIHMWrapper directly") + The first encoder will contain the image embedding step, and the last encoder will contain the encoder "neck". + """ + encoder_splits = [] + if num_splits == 0: + # Single end-to-end encoder (default constructor) + encoder_splits.append(SAMEncoderPart(sam)) + else: + # Split the encoder into several models. + # Each model will have a portion of the transformer blocks. + + # Get number of transformer blocks that should be included in each model + n_transformer_blocks = len(sam.image_encoder.blocks) + block_split_len = n_transformer_blocks // (num_splits + 1) + + # Generate split indices + split_idx: list[tuple[int, int]] = [] + for i in range(0, num_splits + 1): + split_idx.append((i * block_split_len, (i + 1) * block_split_len)) + + # Add make sure final split if self.num_encoder_splits + 1 is the last transformer block. + # This is necessary if self.num_encoder_splits + 1 does not evenly divide n_transformer_blocks + split_idx[-1] = (split_idx[-1][0], n_transformer_blocks) + + # Add first encoder. Includes embedding + transformer blocks. + encoder_splits.append( + SAMEncoderPart( + sam, + include_embedding=True, + include_transformer_blocks=split_idx[0], + include_neck=False, + ) + ) + # Add several encoders consisting of only transformer blocks.. + for i in range(1, len(split_idx) - 1): + encoder_splits.append( + SAMEncoderPart( + sam, + include_embedding=False, + include_transformer_blocks=split_idx[i], + include_neck=False, + ) + ) + + # Add final encoder. Includes transformer blocks + neck. + encoder_splits.append( + SAMEncoderPart( + sam, + include_embedding=False, + include_transformer_blocks=split_idx[-1], + include_neck=True, + ) + ) -class SegmentAnythingEncoder(BaseModel): - """Exportable SAM encoder""" + return encoder_splits def __init__( self, - sam: torch.nn.Module, - ResizeLongestSide: Callable, + sam: Sam, + include_embedding: bool = True, + include_transformer_blocks: tuple[int, int] | None = (0, -1), + include_neck=True, ) -> None: super().__init__() self.sam = sam - self.transforms = ResizeLongestSide(self.sam.image_encoder.img_size) + self.include_embedding = include_embedding + self.include_neck = include_neck + self.include_transformer_blocks = include_transformer_blocks + + if ( + not include_embedding + and not include_transformer_blocks + and not include_neck + ): + raise ValueError("You must include at least 1 slice of the encoder") - def forward(self, image: torch.Tensor) -> torch.Tensor: + def forward(self, xOrImage: torch.Tensor) -> torch.Tensor: """ Run SAM Image encoder and returns image embeddings Parameters: - image: Pixel values pre-procewindow_partitionssed for encoder consumption. - Range: float[0, 255] normalized via preprocess_input_image - 3-channel Color Space: RGB + xOrImage: + If self.include_embedding is true: + Raw floating point pixel values for encoder consumption. + 3-channel Color Space: RGB, range [0, 1] + + Otherwise: + An intermediate tensor that is an output of a different "slice" of the encoder. + Shape (batch_size, 64, 64, 768) Returns: - image_embeddings + If self.include_neck: + image_embeddings + Shape (1, 256, 64, 64) + + else: + An intermeidate tensor output of this "slice" of the encoder. + Shape (batch_size, 64, 64, 768) """ - return self.sam.image_encoder(image) + x = xOrImage + if self.include_embedding: + x = self.sam.preprocess(x) + x = self.sam.image_encoder.patch_embed(x) + if self.sam.image_encoder.pos_embed is not None: + x = x + self.sam.image_encoder.pos_embed + + if self.include_transformer_blocks is not None: + for blk in self.sam.image_encoder.blocks[self.include_transformer_blocks[0] : self.include_transformer_blocks[1]]: # type: ignore + x = blk(x) + + if self.include_neck: + x = self.sam.image_encoder.neck(x.permute(0, 3, 1, 2)) + + return x + + @staticmethod + def get_input_spec( + batch_size: int = 1, + encoder_img_height: int = 1024, # self.sam.image_encoder.img_size + encoder_img_width: int = 1024, # self.sam.image_encoder.img_size + include_embedding: bool = True, + embedding_size: int = 768, + ) -> InputSpec: + # Get the input specification ordered (name -> (shape, type)) pairs for this model. + # + # This can be used with the qai_hub python API to declare + # the model input specification upon submitting a profile job. + if include_embedding: + return { + "image": ( + (batch_size, 3, encoder_img_height, encoder_img_width), + "float32", + ) + } + else: + return {"x": ((batch_size, 64, 64, embedding_size), "float32")} def _get_input_spec_for_instance( self, @@ -118,44 +272,41 @@ def _get_input_spec_for_instance( ) -> InputSpec: """ Override for model.get_input_spec() when called on instances of this class. - - The initializer for BaseModel will automatically override get_input_spec - with this function when the class is instantiated. """ return self.__class__.get_input_spec( batch_size, self.sam.image_encoder.img_size, self.sam.image_encoder.img_size, + self.include_embedding, + self.sam.image_encoder.blocks[0].attn.in_feature, ) @staticmethod - def get_input_spec( - batch_size: int = 1, - encoder_img_height: int = 1024, # self.sam.image_encoder.img_size[0] - encoder_img_width: int = 1024, # self.sam.image_encoder.img_size[1] - ) -> InputSpec: - # Get the input specification ordered (name -> (shape, type)) pairs for this model. - # - # This can be used with the qai_hub python API to declare - # the model input specification upon submitting a profile job. - return { - "image": ( - (batch_size, 3, encoder_img_height, encoder_img_width), - "float32", - ) - } + def get_channel_last_inputs(include_embedding=True) -> list[str]: + return list( + SAMEncoderPart.get_input_spec(include_embedding=include_embedding).keys() + ) - @staticmethod - def get_channel_last_inputs() -> list[str]: - return ["image"] + def _get_channel_last_inputs_for_instance(self) -> list[str]: + return self.__class__.get_channel_last_inputs(self.include_embedding) @staticmethod - def get_channel_last_outputs() -> list[str]: - return ["image_embeddings"] + def get_channel_last_outputs(include_neck=True) -> list[str]: + return SAMEncoderPart.get_output_names(include_neck) + + def _get_channel_last_outputs_for_instance(self) -> list[str]: + return self.__class__.get_channel_last_outputs(self.include_neck) @staticmethod - def get_output_names() -> list[str]: - return ["image_embeddings"] + def get_output_names(include_neck=True) -> list[str]: + return ( + ["image_embeddings"] + if include_neck + else ["intermediate_SAM_encoder_output"] + ) + + def _get_output_names_for_instance(self): + return SAMEncoderPart.get_output_names(self.include_neck) def preprocess_input_image(self, input_image: np.ndarray): """Transform input image to work with SAM encoder""" @@ -171,36 +322,47 @@ def preprocess_input_image(self, input_image: np.ndarray): return self.sam.preprocess(transformed_image) @classmethod - def from_pretrained(cls): - return SAMQAIHMWrapper.from_pretrained().get_sam_encoder() + def from_pretrained(cls, model_type: str = SMALL_MODEL_TYPE): + return SAMQAIHMWrapper.from_pretrained( + model_type, num_encoder_splits=0 + ).encoder_splits[0] -class SegmentAnythingONNXDecoder(BaseModel): - """Exportable SAM decoder""" +class SAMDecoder(BaseModel): + """ + Adapted from from segment_anything.utils.onnx.SamOnnxModel with modifications. - def __init__( - self, - sam_qaihm_wrapper: SAMQAIHMWrapper, - orig_img_size: tuple[int, int] = (720, 1280), - single_mask_mode: bool = True, - ) -> None: + This removes output mask resizing. Because this requires a dynamic shape to accomplish + in the network, it's better to do this as a postprocessing step rather than in the inference + framework itself. + """ + + def __init__(self, sam: Sam, return_single_mask: bool): super().__init__() - self.sam = sam_qaihm_wrapper.get_sam() - self.sam_decoder = sam_qaihm_wrapper.SamOnnxModel( - self.sam, orig_img_size=orig_img_size, return_single_mask=single_mask_mode - ) - self.transforms = sam_qaihm_wrapper.ResizeLongestSide( - self.sam.image_encoder.img_size - ) + self.model = sam + self.embed_size = self.model.prompt_encoder.image_embedding_size + self.img_size = sam.image_encoder.img_size + self.return_single_mask = return_single_mask + + def _embed_masks(self, input_mask: torch.Tensor | None) -> torch.Tensor: + """ + Lifted from segment_anything.utils.onnx.SamOnnxModel + + Modified to remove ops based on whether input_mask is set. + """ + if input_mask is not None: + return self.model.prompt_encoder.mask_downscaling(input_mask) + return torch.zeros( + 1, 1, *self.embed_size + ) + self.model.prompt_encoder.no_mask_embed.weight.reshape(1, -1, 1, 1) def forward( self, image_embeddings: torch.Tensor, point_coords: torch.Tensor, point_labels: torch.Tensor, - mask_input: torch.Tensor, - has_mask_input: torch.Tensor, - ) -> torch.Tensor: + mask_input: torch.Tensor | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: """ Run SAM lightweight decoder and return generated mask for given points @@ -212,27 +374,35 @@ def forward( point_labels: torch.Tensor of shape [1, k] Point Labels to select/de-select given point for segmentation e.g. Corresponding value is 1 if this point is to be included, otherwise 0 - mask_input: torch.Tensor of shape [1, 1, 4 * image_emd_size, 4 * image_emb_size] - Input mask to consider for segmentation. If using point based segmentation, set this to torch.zeros() - has_mask_input: torch.Tensor of shape [1] - If has value [1] then mask_input is used, otherwise no. - If using point based segmentation, can set this to [0] + mask_input: torch.Tensor of shape [1, 1, 4 * self.embed_size, 4 * self.embed_size] + Input mask to consider for segmentation. If using point based segmentation, this is unused. Returns: - upscaled_masks: torch.Tensor of shape [1, k, ] - score: torch.Tensor of shape [1, k] masks: torch.Tensor of shape [1, k, 256, 256] - Use this low resolution masks to further slice and upscale for resolutions that Decoder is not intialized to. + scores: torch.Tensor of shape [1, k] Where, k = number of points """ - return self.sam_decoder( - image_embeddings, point_coords, point_labels, mask_input, has_mask_input + sparse_embedding = SamOnnxModel._embed_points(self, point_coords, point_labels) # type: ignore + dense_embedding = self._embed_masks(mask_input) + + masks, scores = sam_decoder_predict_masks( + self.model.mask_decoder, + image_embeddings=image_embeddings, + image_pe=self.model.prompt_encoder.get_dense_pe(), + sparse_prompt_embeddings=sparse_embedding, + dense_prompt_embeddings=dense_embedding, ) + if self.return_single_mask: + masks, scores = SamOnnxModel.select_masks(self, masks, scores, point_coords.shape[1]) # type: ignore + + return masks, scores + def _get_input_spec_for_instance( - self, + self: SAMDecoder, + has_mask_input: bool = False, num_of_points: int = 1, ) -> InputSpec: """ @@ -242,14 +412,16 @@ def _get_input_spec_for_instance( with this function when the class is instantiated. """ return self.__class__.get_input_spec( + has_mask_input, num_of_points, - self.sam.prompt_encoder.embed_dim, - self.sam.prompt_encoder.image_embedding_size[0], - self.sam.prompt_encoder.image_embedding_size[1], + self.model.prompt_encoder.embed_dim, + self.embed_size[0], + self.embed_size[1], ) @staticmethod def get_input_spec( + has_mask_input: bool = False, num_of_points: int = 1, embed_dim: int = 256, image_embedding_height: int = 64, @@ -266,26 +438,30 @@ def get_input_spec( "image_embeddings": ((1, embed_dim, *embed_size), "float32"), "point_coords": ((1, num_of_points, 2), "float32"), "point_labels": ((1, num_of_points), "float32"), - "mask_input": ((1, 1, *mask_input_size), "float32"), - "has_mask_input": ((1,), "float32"), } + if has_mask_input: + input_spec["mask_input"] = (((1, 1, *mask_input_size), "float32"),) + input_spec["has_mask_input"] = (((1,), "float32"),) return input_spec @staticmethod - def get_channel_last_inputs() -> list[str]: - return ["image_embeddings", "mask_input"] + def get_channel_last_inputs(has_mask_input: bool = False) -> list[str]: + out = ["image_embeddings"] + if has_mask_input: + out.append("mask_input") + return out @staticmethod def get_channel_last_outputs() -> list[str]: - return ["upscaled_masks", "masks"] + return ["masks"] @staticmethod def get_output_names() -> list[str]: - return ["upscaled_masks", "scores", "masks"] + return ["masks", "scores"] @classmethod - def from_pretrained(cls): - return SAMQAIHMWrapper.from_pretrained().get_sam_decoder() + def from_pretrained(cls, model_type: str = SMALL_MODEL_TYPE): + return SAMQAIHMWrapper.from_pretrained(model_type).decoder def _get_weights_path(model_type: str = DEFAULT_MODEL_TYPE) -> Path: diff --git a/qai_hub_models/models/sam/model_patches.py b/qai_hub_models/models/sam/model_patches.py new file mode 100644 index 00000000..c4c7a4b2 --- /dev/null +++ b/qai_hub_models/models/sam/model_patches.py @@ -0,0 +1,536 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from __future__ import annotations + +import math +from math import floor +from typing import cast + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from qai_hub_models.utils.asset_loaders import SourceAsRoot + +SAM_SOURCE_REPO = "https://github.com/facebookresearch/segment-anything" +SAM_SOURCE_REPO_COMMIT = "dca509fe793f601edb92606367a655c15ac00fdf" +MODEL_ID = __name__.split(".")[-2] +MODEL_ASSET_VERSION = 2 + +with SourceAsRoot( + SAM_SOURCE_REPO, + SAM_SOURCE_REPO_COMMIT, + MODEL_ID, + MODEL_ASSET_VERSION, +): + from segment_anything.modeling.image_encoder import Attention as SAMEncoderAttention + from segment_anything.modeling.image_encoder import ( + MLPBlock as SAMTransformerMLPBlock, + ) + from segment_anything.modeling.image_encoder import get_rel_pos + from segment_anything.modeling.mask_decoder import MLP as SAMMaskDecoderMLP + from segment_anything.modeling.mask_decoder import MaskDecoder as SAMMaskDecoder + from segment_anything.modeling.transformer import Attention as SAMDecoderAttention + + +def window_partition_5d( + x: torch.Tensor, window_size: int +) -> tuple[torch.Tensor, tuple[int, int]]: + """ + --- + Lifted from segment_anything.modeling.image_encoder.window_partition + Modified by Qualcomm to work in 5D rather than 6D. + --- + + Partition into non-overlapping windows with padding if needed. + + Args: + x (tensor): input tokens with [B, H, W, C]. + window_size (int): window size. + + Returns: + windows: windows after partition with [B * num_windows, window_size, window_size, C]. + (Hp, Wp): padded height and width before partition + """ + B, H, W, C = x.shape + + pad_h = (window_size - H % window_size) % window_size + pad_w = (window_size - W % window_size) % window_size + if pad_h > 0 or pad_w > 0: + x = F.pad(x, (0, 0, 0, pad_w, 0, pad_h)) + Hp, Wp = H + pad_h, W + pad_w + + # -- Begin Qualcomm Modification -- + x = x.reshape(B * Hp // window_size, window_size, Wp // window_size, window_size, C) + windows = ( + x.permute(0, 2, 1, 3, 4).contiguous().view(-1, window_size, window_size, C) + ) + # -- End Qualcomm Modification -- + return windows, (Hp, Wp) + + +def window_unpartition_5d( + windows: torch.Tensor, + window_size: int, + pad_hw: tuple[int, int], + hw: tuple[int, int], +) -> torch.Tensor: + """ + --- + Lifted from segment_anything.modeling.image_encoder.window_unpartition + Modified by Qualcomm to work in 5D rather than 6D. + --- + + Window unpartition into original sequences and removing padding. + + Args: + windows (tensor): input tokens with [B * num_windows, window_size, window_size, C]. + window_size (int): window size. + pad_hw (Tuple): padded height and width (Hp, Wp). + hw (Tuple): original height and width (H, W) before padding. + + Returns: + x: unpartitioned sequences with [B, H, W, C]. + """ + Hp, Wp = pad_hw + H, W = hw + B = windows.shape[0] // (Hp * Wp // window_size // window_size) + + # -- Begin Qualcomm Modification -- + x = windows.reshape(B, Hp // window_size, Wp // window_size, window_size, -1) + x = x.permute(0, 1, 3, 2, 4).contiguous().view(B, Hp, Wp, -1) + # -- End Qualcomm Modification -- + + if Hp > H or Wp > W: + x = x[:, :H, :W, :].contiguous() + return x + + +def resize_longest_image_size( + input_image_size: tuple[int, int], longest_side: int +) -> tuple[int, int]: + """ + Lifted from segment_anything.utils.onnx.SamOnnxModel.mask_postprocessing + + Modified to break this apart from the decoder class instance. + """ + scale = longest_side / max(input_image_size) + transformed_size = cast( + tuple[int, int], + tuple(int(floor(scale * each + 0.5)) for each in input_image_size), + ) + return transformed_size + + +def mask_postprocessing( + masks: torch.Tensor, encoder_img_size: int, orig_im_size: tuple[int, int] +) -> torch.Tensor: + """ + Lifted from segment_anything.utils.onnx.SamOnnxModel.mask_postprocessing + + Modified to break this apart from the decoder class instance. + """ + masks = F.interpolate( + masks, + size=(encoder_img_size, encoder_img_size), + mode="bilinear", + align_corners=False, + ) + + prepadded_size = resize_longest_image_size(orig_im_size, encoder_img_size) + masks = masks[..., : int(prepadded_size[0]), : int(prepadded_size[1])] + + h, w = orig_im_size[0], orig_im_size[1] + masks = F.interpolate(masks, size=(h, w), mode="bilinear", align_corners=False) + return masks + + +class Conv2DInplaceLinear(nn.Module): + """ + An implementation of Linear / Conv1D that uses a 1x1 Conv2D op instead. + + The Conv2D implementation for Qualcomm DSPs is faster than the Linear/Conv1D implementation. + """ + + @staticmethod + def from_linear(mod: torch.nn.Linear | torch.nn.Conv1d) -> Conv2DInplaceLinear: + if isinstance(mod, torch.nn.Linear): + weight, bias = mod.weight, mod.bias + bias = mod.bias + elif isinstance(mod, torch.nn.Conv1d): + weight, bias = mod.weight.T, mod.bias + else: + raise NotImplementedError() + + out_features, in_features = weight.shape + linear = Conv2DInplaceLinear( + in_features, + out_features, + bias is not None, + mod.device if hasattr(mod, "device") else None, + ) + linear.conv2d.weight.data.copy_(weight.data[:, :, None, None]) + if bias is not None: + assert linear.conv2d.bias is not None + linear.conv2d.bias.data.copy_(bias.data) + + return linear + + def __init__( + self, + in_features, + out_features, + has_bias: bool = True, + device: torch.device | None = None, + ): + super().__init__() + self.conv2d = torch.nn.Conv2d(in_features, out_features, 1, bias=has_bias) + if device: + self.conv2d.to(device) + + def __getattr__(self, attr): + conv2d = self._modules["conv2d"] + if attr == "conv2d": + return conv2d + return getattr(conv2d, attr) + + def forward(self, x: torch.Tensor): + ndim = x.ndim + if ndim == 2: + x = x.unsqueeze(0).unsqueeze(1) + elif ndim == 3: + x = x.unsqueeze(1) + elif x.ndim == 4: + pass + + x = x.permute(0, 3, 1, 2) # (B, L, D) -> (B, D, 1, L) + x = self.conv2d(x) + x = x.permute(0, 2, 3, 1) + + if ndim == 2: + x = x.squeeze(1).squeeze(0) + elif ndim == 3: + x = x.squeeze(1) + elif ndim == 4: + pass + return x + + +class SplitHeadSAMEncoderAttention(nn.Module): + """ + SAM Attention block with the following modifications necessary to run on QNN: + * Heads are split into separate ops, rather than all heads running in a single op. + * QKV is unpacked from 1 tensor into 3 tensors. + """ + + def __init__(self, attention_block: SAMEncoderAttention): + super().__init__() + self.out_feature, self.in_feature = ( + attention_block.qkv.weight.shape[0] // 3 // attention_block.num_heads, + attention_block.qkv.weight.shape[1], + ) + chunk_size = attention_block.qkv.weight.shape[0] // 3 + + bias = attention_block.qkv.bias[: self.out_feature] is not None + self.q = torch.nn.ModuleList() + self.k = torch.nn.ModuleList() + self.v = torch.nn.ModuleList() + self.proj = Conv2DInplaceLinear.from_linear(attention_block.proj) + self.use_rel_pos = attention_block.use_rel_pos + self.scale = attention_block.scale + self.num_heads = attention_block.num_heads + self.rel_pos_h = attention_block.rel_pos_h + self.rel_pos_w = attention_block.rel_pos_w + + for i in range(attention_block.num_heads): + for chunk, projList in enumerate([self.q, self.k, self.v]): + split_layer = Conv2DInplaceLinear( + self.in_feature, self.out_feature, has_bias=bias + ) + split_layer.conv2d.weight.data.copy_( + attention_block.qkv.weight[ + i * self.out_feature + + (chunk * chunk_size) : (i + 1) * self.out_feature + + (chunk * chunk_size), + :, + None, + None, + ] + ) + + split_layer.conv2d.bias.data.copy_( + attention_block.qkv.bias[ + i * self.out_feature + + (chunk * chunk_size) : (i + 1) * self.out_feature + + (chunk * chunk_size) + ] + ) + + projList.append(split_layer) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + B, H, W, _ = x.shape + """ + #original code + # qkv with shape (3, B, nHead, H * W, C) + qkv = self.qkv(x).reshape(B, H * W, 3, self.self.num_heads, -1).permute(2, 0, 3, 1, 4) + # q, k, v with shape (B * nHead, H * W, C) + q0, k0, v0 = qkv.reshape(3, B * self.self.num_heads, H * W, -1).unbind(0) + """ + x_list: list[torch.Tensor] = [] + for i in range(self.num_heads): + q_i = self.q[i](x).reshape(B, H * W, 1, -1).permute(0, 2, 1, 3) + k_i = self.k[i](x).reshape(B, H * W, 1, -1).permute(0, 2, 1, 3) + v_i = self.v[i](x).reshape(B, H * W, 1, -1).permute(0, 2, 1, 3) + attn_i = (q_i * self.scale) @ k_i.transpose(-2, -1) + + if self.use_rel_pos: + attn_i = SplitHeadSAMEncoderAttention.add_decomposed_rel_pos_unpack( + attn_i, + q_i, + self.rel_pos_h, + self.rel_pos_w, + (H, W), + (H, W), + ) + + attn_i = attn_i.softmax(dim=-1) + x_i = (attn_i @ v_i).reshape(B, 1, H * W, -1) + x_list.append(x_i) + x = ( + torch.concat(x_list, dim=1) + .reshape(B, self.num_heads, H, W, -1) + .permute(0, 2, 3, 1, 4) + .reshape(B, H, W, -1) + ) + x = self.proj(x) + + return x + + @staticmethod + def einsum_to_matmul_bhwc_hkc_bhwk(r_q, Rh): + Rh = torch.transpose(Rh, 2, 1) + op = torch.matmul(r_q, Rh) + return op + + @staticmethod + def einsum_to_matmul_bhwc_wkc_bhwk(r_q, Rw): + r_q = torch.transpose(r_q, 2, 1) + Rw = torch.transpose(Rw, 2, 1) + test_result_second = torch.matmul(r_q, Rw) + op = torch.transpose(test_result_second, 2, 1) + return op + + @staticmethod + def add_decomposed_rel_pos_unpack( + attn: torch.Tensor, + q: torch.Tensor, + rel_pos_h: torch.Tensor, + rel_pos_w: torch.Tensor, + q_size: tuple[int, int], + k_size: tuple[int, int], + ) -> torch.Tensor: + """ + --- + Lifted from segment_anything.modeling.image_encoder.add_decomposed_rel_pos + Modifications by Qualcomm: + * Enable compatibility of Q shape with other changes that unpack attention QKV + * Replace Einsum with equivalent ops (einsum is not supported by QNN) + --- + + Calculate decomposed Relative Positional Embeddings from :paper:`mvitv2`. + https://github.com/facebookresearch/mvit/blob/19786631e330df9f3622e5402b4a419a263a2c80/mvit/models/attention.py # noqa B950 + Args: + attn (Tensor): attention map. + q (Tensor): query q in the attention layer with shape (B, q_h, q_w, C). + rel_pos_h (Tensor): relative position embeddings (Lh, C) for height axis. + rel_pos_w (Tensor): relative position embeddings (Lw, C) for width axis. + q_size (Tuple): spatial sequence size of query q with (q_h, q_w). + k_size (Tuple): spatial sequence size of key k with (k_h, k_w). + + Returns: + attn (Tensor): attention map with added relative positional embeddings. + """ + q_h, q_w = q_size + k_h, k_w = k_size + Rh = get_rel_pos(q_h, k_h, rel_pos_h) + Rw = get_rel_pos(q_w, k_w, rel_pos_w) + + # -- Begin Qualcomm Change + B, _, _, dim = q.shape + r_q = q.reshape(B, q_h, q_w, dim) + rel_h = SplitHeadSAMEncoderAttention.einsum_to_matmul_bhwc_hkc_bhwk(r_q, Rh) + rel_w = SplitHeadSAMEncoderAttention.einsum_to_matmul_bhwc_wkc_bhwk(r_q, Rw) + # -- End Qualcomm Change + + attn = ( + attn.view(B, q_h, q_w, k_h, k_w) + + rel_h[:, :, :, :, None] + + rel_w[:, :, :, None, :] + ).view(B, 1, q_h * q_w, k_h * k_w) + + return attn + + +class SplitHeadSAMDecoderAttention(nn.Module): + def __init__(self, attention_block: SAMDecoderAttention): + super().__init__() + self.embedding_dim = attention_block.embedding_dim # in features + self.internal_dim = attention_block.internal_dim # out features + + self.num_heads = attention_block.num_heads + self.in_features = self.embedding_dim + self.out_features = self.internal_dim + self.qkv_out_features_per_head = self.out_features // self.num_heads + + self.qproj = torch.nn.ModuleList() + self.kproj = torch.nn.ModuleList() + self.vproj = torch.nn.ModuleList() + self.out_proj = attention_block.out_proj + self.out_projs = [] + for i in range(attention_block.num_heads): + for projList, merged_layer in [ + (self.qproj, attention_block.q_proj), + (self.kproj, attention_block.k_proj), + (self.vproj, attention_block.v_proj), + ]: + split_layer = Conv2DInplaceLinear( + self.in_features, self.qkv_out_features_per_head + ) + split_layer.conv2d.weight.data.copy_( + merged_layer.weight[ + i + * self.qkv_out_features_per_head : (i + 1) + * self.qkv_out_features_per_head, + :, + None, + None, + ] + ) + + split_layer.conv2d.bias.data.copy_( + merged_layer.bias[ + i + * self.qkv_out_features_per_head : (i + 1) + * self.qkv_out_features_per_head + ] + ) + projList.append(split_layer) + + def forward( + self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor + ) -> torch.Tensor: + attns = [] + for i in range(0, self.num_heads): + # Single head attention + qOut: torch.Tensor = self.qproj[i](q) + kOut: torch.Tensor = self.kproj[i](k) + vOut: torch.Tensor = self.vproj[i](v) + attn = qOut @ kOut.transpose(-2, -1) + attn = attn / math.sqrt(self.qkv_out_features_per_head) + attn = torch.softmax(attn, dim=-1) + attns.append(attn @ vOut) + + # Combine heads + return self.out_proj(torch.cat(attns, -1)) + + +def sam_decoder_predict_masks( + self: SAMMaskDecoder, + image_embeddings: torch.Tensor, + image_pe: torch.Tensor, + sparse_prompt_embeddings: torch.Tensor, + dense_prompt_embeddings: torch.Tensor, +): + """ + SamMaskDecoder.predict_masks modified to skip the per-image batch expansion if no expansion is required. + + If no expansion is required, a noop is left in the graph, which causes compilation to QNN to fail. + + Repeat-interleave also generates a 5D tensor, which causes compilation to fail, so it is replaced with Tile. + """ + output_tokens = torch.cat([self.iou_token.weight, self.mask_tokens.weight], dim=0) + output_tokens = output_tokens.unsqueeze(0).expand( + sparse_prompt_embeddings.size(0), -1, -1 + ) + tokens = torch.cat((output_tokens, sparse_prompt_embeddings), dim=1) + + # Expand per-image data in batch direction to be per-mask + # -- Begin Qualcomm Modification -- + num_mask = tokens.shape[0] + if num_mask != 1: + tile_dims = [1] * len(image_embeddings.shape) + tile_dims[0] = num_mask + src = torch.tile(image_embeddings, tile_dims) + + tile_dims = [1] * len(image_pe.shape) + tile_dims[0] = num_mask + pos_src = torch.tile(image_pe, tile_dims) + else: + src = image_embeddings + pos_src = image_pe + + src = src + dense_prompt_embeddings + b, c, h, w = src.shape + # -- End Qualcomm Modification -- + + # Run the transformer + hs, src = self.transformer(src, pos_src, tokens) + iou_token_out = hs[:, 0, :] + mask_tokens_out = hs[:, 1 : (1 + self.num_mask_tokens), :] + + # Upscale mask embeddings and predict masks using the mask tokens + src = src.transpose(1, 2).view(b, c, h, w) + upscaled_embedding = self.output_upscaling(src) + hyper_in_list: list[torch.Tensor] = [] + for i in range(self.num_mask_tokens): + hyper_in_list.append( + self.output_hypernetworks_mlps[i](mask_tokens_out[:, i, :]) + ) + hyper_in = torch.stack(hyper_in_list, dim=1) + b, c, h, w = upscaled_embedding.shape + masks = (hyper_in @ upscaled_embedding.view(b, c, h * w)).view(b, -1, h, w) + + # Generate mask quality predictions + iou_pred = self.iou_prediction_head(iou_token_out) + + return masks, iou_pred + + +class Conv2DInplaceLinearSAMTransformerMLPBlock(nn.Module): + """ + SAM MLPBlock that uses 1x1 Conv2D in place of linear layers. + """ + + def __init__(self, mlp_block: SAMTransformerMLPBlock): + super().__init__() + self.lin1 = Conv2DInplaceLinear.from_linear(mlp_block.lin1) + self.lin2 = Conv2DInplaceLinear.from_linear(mlp_block.lin2) + self.act = mlp_block.act + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.lin2(self.act(self.lin1(x))) + + +class Conv2DInplaceLinearSAMMaskDecoderMLP(nn.Module): + """ + SAM MLP that uses 1x1 Conv2D in place of linear layers. + """ + + def __init__(self, mlp: SAMMaskDecoderMLP): + super().__init__() + self.layers = nn.ModuleList() + self.num_layers = mlp.num_layers + self.sigmoid_output = mlp.sigmoid_output + for module in mlp.layers: + assert isinstance(module, nn.Linear) + self.layers.append(Conv2DInplaceLinear.from_linear(module)) + + def forward(self, x): + for i, layer in enumerate(self.layers): + x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x) + if self.sigmoid_output: + x = F.sigmoid(x) + return x diff --git a/qai_hub_models/models/sam/perf.yaml b/qai_hub_models/models/sam/perf.yaml index 0439892a..b658cae4 100644 --- a/qai_hub_models/models/sam/perf.yaml +++ b/qai_hub_models/models/sam/perf.yaml @@ -44,22 +44,53 @@ aggregated: models: - name: SAMDecoder universal_assets: - torchscript_onnx_tflite: mq24762lm + torchscript_onnx_tflite: mq36e023q + torchscript_onnx: mqe7xlyvm performance_metrics: - torchscript_onnx_tflite: - inference_time: 29311.0 - throughput: 34.11688444611238 + inference_time: 7557.0 + throughput: 132.32764324467382 estimated_peak_memory_range: - min: 4026368 - max: 41595360 + min: 294912 + max: 32109448 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 337 + layers_on_npu: 845 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 337 - job_id: jp2k4x14p + total_layers: 845 + job_id: j5welzy45 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 7052.0 + throughput: 141.80374361883153 + estimated_peak_memory_range: + min: 4235264 + max: 20294864 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 838 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 838 + job_id: jped7x085 + job_status: Passed + torchscript_onnx: + inference_time: 11415.0 + throughput: 87.60402978537013 + estimated_peak_memory_range: + min: 28672 + max: 278927600 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 868 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 868 + job_id: jgz3lyez5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -68,21 +99,51 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:34:16Z' + timestamp: '2024-12-11T22:59:03Z' - torchscript_onnx_tflite: - inference_time: 20843.0 - throughput: 47.97773832941515 + inference_time: 5230.0 + throughput: 191.20458891013385 + estimated_peak_memory_range: + min: 49152 + max: 40879744 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 845 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 845 + job_id: j5mn0k97p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 4973.0 + throughput: 201.08586366378444 + estimated_peak_memory_range: + min: 4210688 + max: 48099648 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 838 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 838 + job_id: jp4ly4625 + job_status: Passed + torchscript_onnx: + inference_time: 7741.0 + throughput: 129.1822761917065 estimated_peak_memory_range: - min: 3977216 - max: 125267776 + min: 4780032 + max: 131911280 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 337 + layers_on_npu: 868 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 337 - job_id: jp0zd4w65 + total_layers: 868 + job_id: jpxklrvj5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -91,21 +152,51 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:34:18Z' + timestamp: '2024-12-11T22:59:09Z' - torchscript_onnx_tflite: - inference_time: 20649.0 - throughput: 48.4284953266502 + inference_time: 5180.0 + throughput: 193.05019305019306 + estimated_peak_memory_range: + min: 274432 + max: 36221120 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 845 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 845 + job_id: jgke200vg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 3787.0 + throughput: 264.06126221283336 + estimated_peak_memory_range: + min: 4206592 + max: 44127088 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 838 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 838 + job_id: jp0zm9w05 + job_status: Passed + torchscript_onnx: + inference_time: 8231.0 + throughput: 121.49192078726765 estimated_peak_memory_range: - min: 3633152 - max: 134462976 + min: 151552 + max: 90336272 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 337 + layers_on_npu: 868 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 337 - job_id: jgkeov12g + total_layers: 868 + job_id: jp8qer9zp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -114,21 +205,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:34:20Z' + timestamp: '2024-12-11T22:59:15Z' - torchscript_onnx_tflite: - inference_time: 29453.0 - throughput: 33.952398736970764 + inference_time: 7484.0 + throughput: 133.61838588989846 + estimated_peak_memory_range: + min: 286720 + max: 34810360 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 845 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 845 + job_id: jgjvryy1g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 6814.0 + throughput: 146.75667742882302 estimated_peak_memory_range: - min: 3973120 - max: 42990856 + min: 4288512 + max: 5538064 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 337 + layers_on_npu: 838 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 337 - job_id: jglvo4d85 + total_layers: 838 + job_id: jgo2lwx1p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -137,29 +243,74 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:34:22Z' - - reference_device_info: + timestamp: '2024-12-11T22:58:14Z' + - torchscript_onnx_tflite: + inference_time: 52929.0 + throughput: 18.89323433278543 + estimated_peak_memory_range: + min: 299008 + max: 33837488 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 845 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 845 + job_id: j57yennn5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 50031.0 + throughput: 19.987607683236394 + estimated_peak_memory_range: + min: 1609728 + max: 11906416 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 838 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 838 + job_id: jp4ly47q5 + job_status: Passed + reference_device_info: name: SA7255P ADP os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:34:23Z' + timestamp: '2024-12-11T22:58:26Z' - torchscript_onnx_tflite: - inference_time: 29485.0 - throughput: 33.91555027980329 + inference_time: 7484.0 + throughput: 133.61838588989846 + estimated_peak_memory_range: + min: 286720 + max: 33339136 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 845 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 845 + job_id: jpy1o220p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 6852.0 + throughput: 145.94279042615295 estimated_peak_memory_range: - min: 4018176 - max: 37988176 + min: 4284416 + max: 5614280 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 337 + layers_on_npu: 838 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 337 - job_id: jpv6eq8j5 + total_layers: 838 + job_id: jp0zm9n25 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -168,21 +319,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:34:25Z' + timestamp: '2024-12-11T22:58:33Z' - torchscript_onnx_tflite: - inference_time: 36583.0 - throughput: 27.335101003198208 + inference_time: 9976.0 + throughput: 100.24057738572574 + estimated_peak_memory_range: + min: 299008 + max: 36531104 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 845 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 845 + job_id: jp3jz7rmg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 9050.0 + throughput: 110.49723756906077 estimated_peak_memory_range: - min: 3997696 - max: 120024144 + min: 53248 + max: 6362528 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 337 + layers_on_npu: 838 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 337 - job_id: jped8oq15 + total_layers: 838 + job_id: jgo2lwj4p job_status: Passed reference_device_info: name: SA8295P ADP @@ -191,21 +357,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:34:27Z' + timestamp: '2024-12-11T22:58:39Z' - torchscript_onnx_tflite: - inference_time: 29064.0 - throughput: 34.40682631434076 + inference_time: 7477.0 + throughput: 133.74348000534974 estimated_peak_memory_range: - min: 4005888 - max: 38547920 + min: 299008 + max: 30539392 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 337 + layers_on_npu: 845 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 337 - job_id: j5we8wj65 + total_layers: 845 + job_id: jg9lz2qmg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 6893.0 + throughput: 145.0747134774409 + estimated_peak_memory_range: + min: 4239360 + max: 5792616 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 838 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 838 + job_id: jp14n1rkp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -214,29 +395,74 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:34:29Z' - - reference_device_info: + timestamp: '2024-12-11T22:58:45Z' + - torchscript_onnx_tflite: + inference_time: 10510.0 + throughput: 95.14747859181732 + estimated_peak_memory_range: + min: 299008 + max: 35611360 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 845 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 845 + job_id: jgn6zqwj5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 9667.0 + throughput: 103.44470880314472 + estimated_peak_memory_range: + min: 462848 + max: 6443936 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 838 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 838 + job_id: jprvldkvg + job_status: Passed + reference_device_info: name: SA8775P ADP os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:34:31Z' + timestamp: '2024-12-11T22:58:52Z' - torchscript_onnx_tflite: - inference_time: 33135.0 - throughput: 30.17956843217142 + inference_time: 8533.0 + throughput: 117.19207781553968 estimated_peak_memory_range: - min: 4005888 - max: 131812544 + min: 278528 + max: 38706480 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 337 + layers_on_npu: 845 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 337 - job_id: j57yk2ql5 + total_layers: 845 + job_id: j5q6l1xep + job_status: Passed + torchscript_onnx_qnn: + inference_time: 8315.0 + throughput: 120.26458208057727 + estimated_peak_memory_range: + min: 4210688 + max: 46377424 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 838 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 838 + job_id: jglvy8ne5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -245,25 +471,94 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:34:33Z' -- name: SAMEncoder + timestamp: '2024-12-11T22:58:58Z' + - torchscript_onnx_qnn: + inference_time: 7458.0 + throughput: 134.08420488066506 + estimated_peak_memory_range: + min: 4202496 + max: 4202496 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 838 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 838 + job_id: jp14n19np + job_status: Passed + torchscript_onnx: + inference_time: 14750.0 + throughput: 67.79661016949153 + estimated_peak_memory_range: + min: 13004800 + max: 13004800 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 868 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 868 + job_id: jpv6lmo75 + job_status: Passed + reference_device_info: + name: Snapdragon X Elite CRD + os: '11' + form_factor: Compute + os_name: Windows + manufacturer: Qualcomm + chipset: Snapdragon® X Elite + timestamp: '2024-12-11T22:59:20Z' +- name: SAMEncoderPart1 universal_assets: - torchscript_onnx_tflite: mm6v8j8vq + torchscript_onnx_tflite: mn0jx893m + torchscript_onnx: mngg1lern performance_metrics: - torchscript_onnx_tflite: - inference_time: 10369270.0 - throughput: 0.09643880427455356 + inference_time: 208201.0 + throughput: 4.803050897930365 + estimated_peak_memory_range: + min: 12722176 + max: 77917512 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 585 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 585 + job_id: jg9lz2omg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 203885.0 + throughput: 4.904725703215048 + estimated_peak_memory_range: + min: 12673024 + max: 70290184 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 626 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 626 + job_id: jgz3lyq45 + job_status: Passed + torchscript_onnx: + inference_time: 168865.0 + throughput: 5.921890267373346 estimated_peak_memory_range: - min: 129208320 - max: 132578536 - primary_compute_unit: CPU - precision: fp32 + min: 12713984 + max: 59699960 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 36 - layers_on_cpu: 782 - total_layers: 818 - job_id: jpy1qzl7p + layers_on_npu: 623 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 623 + job_id: j5welzoz5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -272,21 +567,51 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:34:17Z' + timestamp: '2024-12-11T22:59:04Z' - torchscript_onnx_tflite: - inference_time: 9136859.0 - throughput: 0.10944680223258343 + inference_time: 149759.0 + throughput: 6.677395014656882 + estimated_peak_memory_range: + min: 11546624 + max: 691633680 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 585 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 585 + job_id: jgn6zq1j5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 144476.0 + throughput: 6.921564827376173 estimated_peak_memory_range: - min: 95559680 - max: 1681147984 - primary_compute_unit: CPU - precision: fp32 + min: 381759488 + max: 1055182048 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 626 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 626 + job_id: jpxklr885 + job_status: Passed + torchscript_onnx: + inference_time: 121680.0 + throughput: 8.21827744904668 + estimated_peak_memory_range: + min: 12288 + max: 1294151072 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 36 - layers_on_cpu: 782 - total_layers: 818 - job_id: jp8q62nxp + layers_on_npu: 623 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 623 + job_id: j5mn0kryp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -295,21 +620,51 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:34:19Z' + timestamp: '2024-12-11T22:59:09Z' - torchscript_onnx_tflite: - inference_time: 6694410.0 - throughput: 0.14937836194675858 + inference_time: 128622.0 + throughput: 7.774719721354045 + estimated_peak_memory_range: + min: 11673600 + max: 698964784 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 585 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 585 + job_id: j5q6l11ep + job_status: Passed + torchscript_onnx_qnn: + inference_time: 141432.0 + throughput: 7.070535663781888 + estimated_peak_memory_range: + min: 3276800 + max: 686809248 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 626 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 626 + job_id: jp8qernqp + job_status: Passed + torchscript_onnx: + inference_time: 96596.0 + throughput: 10.352395544328958 estimated_peak_memory_range: - min: 115359744 - max: 1666486432 - primary_compute_unit: CPU - precision: fp32 + min: 40955904 + max: 1138625856 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 36 - layers_on_cpu: 782 - total_layers: 818 - job_id: j5q6z0n4p + layers_on_npu: 623 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 623 + job_id: jgke20nyg job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -318,21 +673,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:34:20Z' + timestamp: '2024-12-11T22:59:15Z' - torchscript_onnx_tflite: - inference_time: 10714354.0 - throughput: 0.09333273849268 + inference_time: 205315.0 + throughput: 4.870564741981833 + estimated_peak_memory_range: + min: 12701696 + max: 85441760 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 585 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 585 + job_id: jped7xx85 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 177157.0 + throughput: 5.644710623909865 estimated_peak_memory_range: - min: 128430080 - max: 399974032 - primary_compute_unit: CPU - precision: fp32 + min: 13033472 + max: 14256488 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 36 - layers_on_cpu: 782 - total_layers: 818 - job_id: j56yr2x0p + layers_on_npu: 626 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 626 + job_id: jpv6lm8z5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -341,21 +711,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:34:22Z' + timestamp: '2024-12-11T22:58:14Z' - torchscript_onnx_tflite: - inference_time: 17192024.0 - throughput: 0.05816650791087774 + inference_time: 1173558.0 + throughput: 0.8521095676566476 estimated_peak_memory_range: - min: 130936832 - max: 1674637312 - primary_compute_unit: CPU - precision: fp32 + min: 2490368 + max: 677174256 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 585 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 585 + job_id: jp4ly4425 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 1100710.0 + throughput: 0.9085045107248958 + estimated_peak_memory_range: + min: 3821568 + max: 13681136 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 36 - layers_on_cpu: 782 - total_layers: 818 - job_id: jgo2ozxxp + layers_on_npu: 626 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 626 + job_id: jpxklrqj5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -364,21 +749,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:34:24Z' + timestamp: '2024-12-11T22:58:27Z' - torchscript_onnx_tflite: - inference_time: 11081197.0 - throughput: 0.09024295840963752 + inference_time: 208837.0 + throughput: 4.788423507328682 + estimated_peak_memory_range: + min: 12705792 + max: 73067568 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 585 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 585 + job_id: jp0zm9x05 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 178440.0 + throughput: 5.604124635731899 estimated_peak_memory_range: - min: 129552384 - max: 133118776 - primary_compute_unit: CPU - precision: fp32 + min: 13045760 + max: 14313072 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 36 - layers_on_cpu: 782 - total_layers: 818 - job_id: jgjvod9xg + layers_on_npu: 626 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 626 + job_id: jp8qerlzp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -387,21 +787,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:34:26Z' + timestamp: '2024-12-11T22:58:33Z' - torchscript_onnx_tflite: - inference_time: 10359771.0 - throughput: 0.09652723018684486 + inference_time: 242752.0 + throughput: 4.119430529923544 + estimated_peak_memory_range: + min: 12693504 + max: 670819568 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 585 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 585 + job_id: jgo2lw91p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 206972.0 + throughput: 4.831571420288735 estimated_peak_memory_range: - min: 129806336 - max: 1734033616 - primary_compute_unit: CPU - precision: fp32 + min: 245760 + max: 6692272 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 36 - layers_on_cpu: 782 - total_layers: 818 - job_id: jgz3826k5 + layers_on_npu: 626 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 626 + job_id: jpv6lmj75 job_status: Passed reference_device_info: name: SA8295P ADP @@ -410,21 +825,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:34:28Z' + timestamp: '2024-12-11T22:58:39Z' - torchscript_onnx_tflite: - inference_time: 10872614.0 - throughput: 0.09197420233993407 + inference_time: 208397.0 + throughput: 4.7985335681415755 + estimated_peak_memory_range: + min: 12705792 + max: 68306448 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 585 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 585 + job_id: jp14n1mnp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 177673.0 + throughput: 5.628317189443528 estimated_peak_memory_range: - min: 124084224 - max: 130684168 - primary_compute_unit: CPU - precision: fp32 + min: 13344768 + max: 14993360 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 36 - layers_on_cpu: 782 - total_layers: 818 - job_id: jg9lk06lg + layers_on_npu: 626 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 626 + job_id: jgdxd4jkp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -433,21 +863,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:34:30Z' + timestamp: '2024-12-11T22:58:46Z' - torchscript_onnx_tflite: - inference_time: 13361634.0 - throughput: 0.07484114592571538 + inference_time: 251286.0 + throughput: 3.9795293012742454 + estimated_peak_memory_range: + min: 12713984 + max: 687396512 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 585 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 585 + job_id: jprvld7kg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 211825.0 + throughput: 4.720878083323498 estimated_peak_memory_range: - min: 130527232 - max: 1673934656 - primary_compute_unit: CPU - precision: fp32 + min: 3305472 + max: 9062624 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 36 - layers_on_cpu: 782 - total_layers: 818 - job_id: jgdx8njep + layers_on_npu: 626 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 626 + job_id: jp2krd8xp job_status: Passed reference_device_info: name: SA8775P ADP @@ -456,27 +901,1955 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:34:31Z' + timestamp: '2024-12-11T22:58:52Z' - torchscript_onnx_tflite: - inference_time: 16830940.0 - throughput: 0.059414388025861894 + inference_time: 226498.0 + throughput: 4.4150500225167555 estimated_peak_memory_range: - min: 86020096 - max: 1728387728 - primary_compute_unit: CPU - precision: fp32 + min: 12722176 + max: 1043452320 + primary_compute_unit: NPU + precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 36 - layers_on_cpu: 782 - total_layers: 818 - job_id: jp4lmnzv5 + layers_on_npu: 585 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 585 + job_id: jglvy8925 job_status: Passed - reference_device_info: - name: QCS8450 (Proxy) - os: '13' - form_factor: Xr - os_name: Android + torchscript_onnx_qnn: + inference_time: 219929.0 + throughput: 4.546921961178381 + estimated_peak_memory_range: + min: 8990720 + max: 1011265312 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 626 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 626 + job_id: j56y8m6vp + job_status: Passed + reference_device_info: + name: QCS8450 (Proxy) + os: '13' + form_factor: Xr + os_name: Android + manufacturer: Qualcomm + chipset: QCS8450 Proxy + timestamp: '2024-12-11T22:58:58Z' + - torchscript_onnx_qnn: + inference_time: 171827.0 + throughput: 5.819807131591659 + estimated_peak_memory_range: + min: 12607488 + max: 12607488 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 626 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 626 + job_id: jgdxd4k6p + job_status: Passed + torchscript_onnx: + inference_time: 179691.0 + throughput: 5.565108992659622 + estimated_peak_memory_range: + min: 40386560 + max: 40386560 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 623 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 623 + job_id: jgjvrym7g + job_status: Passed + reference_device_info: + name: Snapdragon X Elite CRD + os: '11' + form_factor: Compute + os_name: Windows + manufacturer: Qualcomm + chipset: Snapdragon® X Elite + timestamp: '2024-12-11T22:59:21Z' +- name: SAMEncoderPart2 + universal_assets: + torchscript_onnx_tflite: mm5ed7w4m + torchscript_onnx: mn7xlr5vq + performance_metrics: + - torchscript_onnx_tflite: + inference_time: 670133.0 + throughput: 1.492241092439859 + estimated_peak_memory_range: + min: 12804096 + max: 108543136 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jp14n1onp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 838379.0 + throughput: 1.1927779679595982 + estimated_peak_memory_range: + min: 12877824 + max: 114030152 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: j5welz045 + job_status: Passed + torchscript_onnx: + inference_time: 722337.0 + throughput: 1.3843953722431497 + estimated_peak_memory_range: + min: 225280 + max: 56282448 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 610 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 610 + job_id: jg9lz2vqg + job_status: Passed + reference_device_info: + name: Samsung Galaxy S23 + os: '13' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 2 + timestamp: '2024-12-11T22:59:04Z' + - torchscript_onnx_tflite: + inference_time: 556374.0 + throughput: 1.797352140826135 + estimated_peak_memory_range: + min: 11923456 + max: 1144561408 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jprvldxkg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 668897.0 + throughput: 1.49499848257654 + estimated_peak_memory_range: + min: 12599296 + max: 1167377808 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: j5mn0k17p + job_status: Passed + reference_device_info: + name: Samsung Galaxy S24 + os: '14' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-12-11T22:59:10Z' + - torchscript_onnx_tflite: + inference_time: 487430.0 + throughput: 2.0515766366452617 + estimated_peak_memory_range: + min: 12533760 + max: 1153551760 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jglvy8825 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 574723.0 + throughput: 1.7399686457650032 + estimated_peak_memory_range: + min: 10604544 + max: 1167088096 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jgke201vg + job_status: Passed + torchscript_onnx: + inference_time: 474562.0 + throughput: 2.107206223844303 + estimated_peak_memory_range: + min: 43212800 + max: 4978673840 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 610 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 610 + job_id: j5q6l1k7p + job_status: Passed + reference_device_info: + name: Snapdragon 8 Elite QRD + os: '15' + form_factor: Phone + os_name: Android + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-12-11T22:59:15Z' + - torchscript_onnx_tflite: + inference_time: 671625.0 + throughput: 1.48892611204169 + estimated_peak_memory_range: + min: 40960 + max: 106793384 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jgz3lyy45 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 737788.0 + throughput: 1.355402907068155 + estimated_peak_memory_range: + min: 13545472 + max: 15027352 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jgjvry91g + job_status: Passed + reference_device_info: + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: QCS8550 Proxy + timestamp: '2024-12-11T22:58:14Z' + - torchscript_onnx_qnn: + inference_time: 1869513.0 + throughput: 0.534898660774223 + estimated_peak_memory_range: + min: 204800 + max: 10980384 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: j5mn0k7yp + job_status: Passed + reference_device_info: + name: SA7255P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA7255P + timestamp: '2024-12-11T22:58:27Z' + - torchscript_onnx_tflite: + inference_time: 690907.0 + throughput: 1.4473728012597933 + estimated_peak_memory_range: + min: 12775424 + max: 121292064 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jp8qerkqp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 731994.0 + throughput: 1.3661314163777298 + estimated_peak_memory_range: + min: 15462400 + max: 17214648 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jgke20jyg + job_status: Passed + reference_device_info: + name: SA8255 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8255P Proxy + timestamp: '2024-12-11T22:58:34Z' + - torchscript_onnx_tflite: + inference_time: 726454.0 + throughput: 1.3765496507693535 + estimated_peak_memory_range: + min: 13012992 + max: 1196571136 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jpv6lmnz5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 782399.0 + throughput: 1.2781202429962206 + estimated_peak_memory_range: + min: 380928 + max: 6687920 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jgjvryj7g + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-12-11T22:58:40Z' + - torchscript_onnx_tflite: + inference_time: 671719.0 + throughput: 1.4887177525125834 + estimated_peak_memory_range: + min: 12750848 + max: 108813808 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jgdxd4m6p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 733997.0 + throughput: 1.362403388569708 + estimated_peak_memory_range: + min: 12824576 + max: 14803808 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: j57yenqq5 + job_status: Passed + reference_device_info: + name: SA8650 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8650P Proxy + timestamp: '2024-12-11T22:58:46Z' + - torchscript_onnx_tflite: + inference_time: 717157.0 + throughput: 1.3943948117357845 + estimated_peak_memory_range: + min: 200704 + max: 1158155328 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jp2krdz6p + job_status: Passed + reference_device_info: + name: SA8775P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8775P + timestamp: '2024-12-11T22:58:52Z' + - reference_device_info: + name: QCS8450 (Proxy) + os: '13' + form_factor: Xr + os_name: Android + manufacturer: Qualcomm + chipset: QCS8450 Proxy + timestamp: '2024-12-11T22:58:58Z' + - torchscript_onnx_qnn: + inference_time: 633760.0 + throughput: 1.5778843726331735 + estimated_peak_memory_range: + min: 12607488 + max: 12607488 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: j5welzkz5 + job_status: Passed + torchscript_onnx: + inference_time: 743453.0 + throughput: 1.3450749408503295 + estimated_peak_memory_range: + min: 53719040 + max: 53719040 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 610 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 610 + job_id: jped7x175 + job_status: Passed + reference_device_info: + name: Snapdragon X Elite CRD + os: '11' + form_factor: Compute + os_name: Windows + manufacturer: Qualcomm + chipset: Snapdragon® X Elite + timestamp: '2024-12-11T22:59:22Z' +- name: SAMEncoderPart3 + universal_assets: + torchscript_onnx_tflite: mq2145z0m + torchscript_onnx: mqyv3p5rq + performance_metrics: + - torchscript_onnx_tflite: + inference_time: 686379.0 + throughput: 1.4569210305093834 + estimated_peak_memory_range: + min: 12767232 + max: 107984512 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jgdxd466p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 852964.0 + throughput: 1.172382421766921 + estimated_peak_memory_range: + min: 12820480 + max: 120863208 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jg9lz27mg + job_status: Passed + torchscript_onnx: + inference_time: 740438.0 + throughput: 1.3505519705903803 + estimated_peak_memory_range: + min: 9277440 + max: 64694824 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 610 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 610 + job_id: jp14n10kp + job_status: Passed + reference_device_info: + name: Samsung Galaxy S23 + os: '13' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 2 + timestamp: '2024-12-11T22:59:05Z' + - reference_device_info: + name: Samsung Galaxy S24 + os: '14' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-12-11T22:59:10Z' + - torchscript_onnx_tflite: + inference_time: 440773.0 + throughput: 2.2687415063989853 + estimated_peak_memory_range: + min: 12431360 + max: 1152750752 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: j56y8mmnp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 579622.0 + throughput: 1.725262326136689 + estimated_peak_memory_range: + min: 11714560 + max: 1169358576 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: j5q6l1nep + job_status: Passed + torchscript_onnx: + inference_time: 472649.0 + throughput: 2.115734932264746 + estimated_peak_memory_range: + min: 41132032 + max: 4978714432 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 610 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 610 + job_id: jglvy8ze5 + job_status: Passed + reference_device_info: + name: Snapdragon 8 Elite QRD + os: '15' + form_factor: Phone + os_name: Android + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-12-11T22:59:16Z' + - torchscript_onnx_tflite: + inference_time: 668273.0 + throughput: 1.4963944376025966 + estimated_peak_memory_range: + min: 12722176 + max: 119821736 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: j5welzz45 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 731938.0 + throughput: 1.366235938016608 + estimated_peak_memory_range: + min: 13017088 + max: 14218888 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jped7xq85 + job_status: Passed + reference_device_info: + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: QCS8550 Proxy + timestamp: '2024-12-11T22:58:15Z' + - torchscript_onnx_qnn: + inference_time: 1874534.0 + throughput: 0.5334659173960035 + estimated_peak_memory_range: + min: 12550144 + max: 23295248 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jgn6zqjv5 + job_status: Passed + reference_device_info: + name: SA7255P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA7255P + timestamp: '2024-12-11T22:58:27Z' + - torchscript_onnx_tflite: + inference_time: 684379.0 + throughput: 1.461178674389483 + estimated_peak_memory_range: + min: 12775424 + max: 112903232 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jgke20kvg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 731638.0 + throughput: 1.366796147821737 + estimated_peak_memory_range: + min: 14118912 + max: 15746592 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: j5q6l1j7p + job_status: Passed + reference_device_info: + name: SA8255 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8255P Proxy + timestamp: '2024-12-11T22:58:34Z' + - torchscript_onnx_tflite: + inference_time: 725387.0 + throughput: 1.3785744712822259 + estimated_peak_memory_range: + min: 12959744 + max: 1203364032 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jgjvry81g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 781146.0 + throughput: 1.280170416285816 + estimated_peak_memory_range: + min: 327680 + max: 6639040 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jped7xj75 + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-12-11T22:58:40Z' + - torchscript_onnx_tflite: + inference_time: 683645.0 + throughput: 1.4627474785890338 + estimated_peak_memory_range: + min: 12816384 + max: 117069656 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: j57yen8n5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 735421.0 + throughput: 1.35976535888967 + estimated_peak_memory_range: + min: 13271040 + max: 14964312 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jp4ly4zq5 + job_status: Passed + reference_device_info: + name: SA8650 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8650P Proxy + timestamp: '2024-12-11T22:58:47Z' + - torchscript_onnx_tflite: + inference_time: 724877.0 + throughput: 1.3795443916692074 + estimated_peak_memory_range: + min: 98304 + max: 1160724768 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jpy1o2y0p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 742231.0 + throughput: 1.3472894557085329 + estimated_peak_memory_range: + min: 1404928 + max: 11716688 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jp0zm9y25 + job_status: Passed + reference_device_info: + name: SA8775P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8775P + timestamp: '2024-12-11T22:58:53Z' + - reference_device_info: + name: QCS8450 (Proxy) + os: '13' + form_factor: Xr + os_name: Android + manufacturer: Qualcomm + chipset: QCS8450 Proxy + timestamp: '2024-12-11T22:58:59Z' + - torchscript_onnx_qnn: + inference_time: 632458.0 + throughput: 1.5811326601924556 + estimated_peak_memory_range: + min: 12607488 + max: 12607488 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jg9lz2rqg + job_status: Passed + torchscript_onnx: + inference_time: 741655.0 + throughput: 1.348335816518462 + estimated_peak_memory_range: + min: 53714944 + max: 53714944 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 610 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 610 + job_id: jgz3ly9z5 + job_status: Passed + reference_device_info: + name: Snapdragon X Elite CRD + os: '11' + form_factor: Compute + os_name: Windows + manufacturer: Qualcomm + chipset: Snapdragon® X Elite + timestamp: '2024-12-11T22:59:22Z' +- name: SAMEncoderPart4 + universal_assets: + torchscript_onnx_tflite: mqe7xlp7m + torchscript_onnx: mn0jx8gzm + performance_metrics: + - torchscript_onnx_qnn: + inference_time: 833428.0 + throughput: 1.199863695484193 + estimated_peak_memory_range: + min: 12873728 + max: 104532576 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jp14n1knp + job_status: Passed + torchscript_onnx: + inference_time: 682163.0 + throughput: 1.4659252993785943 + estimated_peak_memory_range: + min: 143360 + max: 56027856 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 610 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 610 + job_id: jgdxd4wkp + job_status: Passed + reference_device_info: + name: Samsung Galaxy S23 + os: '13' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 2 + timestamp: '2024-12-11T22:59:05Z' + - reference_device_info: + name: Samsung Galaxy S24 + os: '14' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-12-11T22:59:11Z' + - torchscript_onnx_tflite: + inference_time: 437593.0 + throughput: 2.285228511425 + estimated_peak_memory_range: + min: 11272192 + max: 1151293664 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jp3jz77mg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 531361.0 + throughput: 1.8819597222980233 + estimated_peak_memory_range: + min: 10772480 + max: 1172965024 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jglvy8d25 + job_status: Passed + torchscript_onnx: + inference_time: 563940.0 + throughput: 1.7732382877611093 + estimated_peak_memory_range: + min: 27066368 + max: 5457277520 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 610 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 610 + job_id: jg9lze78g + job_status: Passed + reference_device_info: + name: Snapdragon 8 Elite QRD + os: '15' + form_factor: Phone + os_name: Android + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-12-12T07:59:21Z' + - torchscript_onnx_tflite: + inference_time: 675986.0 + throughput: 1.479320577645099 + estimated_peak_memory_range: + min: 12738560 + max: 106796656 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jg9lz22mg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 731308.0 + throughput: 1.3674129094717957 + estimated_peak_memory_range: + min: 13029376 + max: 14985736 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jgz3ly645 + job_status: Passed + reference_device_info: + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: QCS8550 Proxy + timestamp: '2024-12-11T22:58:15Z' + - reference_device_info: + name: SA7255P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA7255P + timestamp: '2024-12-11T22:58:28Z' + - torchscript_onnx_tflite: + inference_time: 677387.0 + throughput: 1.4762609852270563 + estimated_peak_memory_range: + min: 12787712 + max: 107879336 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: j5q6l1dep + job_status: Passed + torchscript_onnx_qnn: + inference_time: 740258.0 + throughput: 1.3508803687363053 + estimated_peak_memory_range: + min: 13242368 + max: 14781944 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jglvy8je5 + job_status: Passed + reference_device_info: + name: SA8255 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8255P Proxy + timestamp: '2024-12-11T22:58:35Z' + - torchscript_onnx_tflite: + inference_time: 725343.0 + throughput: 1.378658096927936 + estimated_peak_memory_range: + min: 12947456 + max: 1205833968 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jped7xn85 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 782501.0 + throughput: 1.2779536383979062 + estimated_peak_memory_range: + min: 368640 + max: 6645696 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jgz3ly1z5 + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-12-11T22:58:41Z' + - torchscript_onnx_tflite: + inference_time: 674544.0 + throughput: 1.482482981095377 + estimated_peak_memory_range: + min: 12824576 + max: 106622600 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jp4ly4225 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 745540.0 + throughput: 1.3413096547468948 + estimated_peak_memory_range: + min: 13303808 + max: 14943960 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jpxklrwj5 + job_status: Passed + reference_device_info: + name: SA8650 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8650P Proxy + timestamp: '2024-12-11T22:58:47Z' + - torchscript_onnx_qnn: + inference_time: 738700.0 + throughput: 1.3537295248409367 + estimated_peak_memory_range: + min: 12488704 + max: 22768048 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jp8qerozp + job_status: Passed + reference_device_info: + name: SA8775P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8775P + timestamp: '2024-12-11T22:58:53Z' + - reference_device_info: + name: QCS8450 (Proxy) + os: '13' + form_factor: Xr + os_name: Android + manufacturer: Qualcomm + chipset: QCS8450 Proxy + timestamp: '2024-12-11T22:58:59Z' + - torchscript_onnx_qnn: + inference_time: 628108.0 + throughput: 1.5920828902035955 + estimated_peak_memory_range: + min: 12607488 + max: 12607488 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jp14n19kp + job_status: Passed + torchscript_onnx: + inference_time: 739450.0 + throughput: 1.352356481168436 + estimated_peak_memory_range: + min: 55521280 + max: 55521280 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 610 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 610 + job_id: j5welzvz5 + job_status: Passed + reference_device_info: + name: Snapdragon X Elite CRD + os: '11' + form_factor: Compute + os_name: Windows + manufacturer: Qualcomm + chipset: Snapdragon® X Elite + timestamp: '2024-12-11T22:59:22Z' +- name: SAMEncoderPart5 + universal_assets: + torchscript_onnx_tflite: mqyv3pw7q + torchscript_onnx: mmr360g2m + performance_metrics: + - torchscript_onnx_tflite: + inference_time: 671428.0 + throughput: 1.4893629696706125 + estimated_peak_memory_range: + min: 12943360 + max: 112437496 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jp4ly4e25 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 844265.0 + throughput: 1.184462224538504 + estimated_peak_memory_range: + min: 12791808 + max: 107220632 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jgdxd4y6p + job_status: Passed + torchscript_onnx: + inference_time: 684639.0 + throughput: 1.4606237739889196 + estimated_peak_memory_range: + min: 192512 + max: 55788184 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 610 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 610 + job_id: j57yenzq5 + job_status: Passed + reference_device_info: + name: Samsung Galaxy S23 + os: '13' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 2 + timestamp: '2024-12-11T22:59:05Z' + - torchscript_onnx_tflite: + inference_time: 548457.0 + throughput: 1.8232969950242226 + estimated_peak_memory_range: + min: 12283904 + max: 1149048528 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jp0zm9905 + job_status: Passed + reference_device_info: + name: Samsung Galaxy S24 + os: '14' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-12-11T22:59:11Z' + - torchscript_onnx_tflite: + inference_time: 484471.0 + throughput: 2.0641070363344762 + estimated_peak_memory_range: + min: 11014144 + max: 1152444960 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jgo2lww1p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 575509.0 + throughput: 1.7375922878703896 + estimated_peak_memory_range: + min: 7655424 + max: 1164727856 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: j56y8mxnp + job_status: Passed + torchscript_onnx: + inference_time: 470838.0 + throughput: 2.1238727545355305 + estimated_peak_memory_range: + min: 40570880 + max: 4972644032 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 610 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 610 + job_id: jp3jz73xg + job_status: Passed + reference_device_info: + name: Snapdragon 8 Elite QRD + os: '15' + form_factor: Phone + os_name: Android + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-12-11T22:59:17Z' + - torchscript_onnx_tflite: + inference_time: 665458.0 + throughput: 1.5027244394086479 + estimated_peak_memory_range: + min: 12734464 + max: 108696136 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jp14n11np + job_status: Passed + torchscript_onnx_qnn: + inference_time: 720635.0 + throughput: 1.3876650454113386 + estimated_peak_memory_range: + min: 12931072 + max: 14385792 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: j5welzk45 + job_status: Passed + reference_device_info: + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: QCS8550 Proxy + timestamp: '2024-12-11T22:58:16Z' + - torchscript_onnx_qnn: + inference_time: 1874681.0 + throughput: 0.5334240865512586 + estimated_peak_memory_range: + min: 11890688 + max: 22612480 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jp2krd2xp + job_status: Passed + reference_device_info: + name: SA7255P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA7255P + timestamp: '2024-12-11T22:58:28Z' + - torchscript_onnx_tflite: + inference_time: 691758.0 + throughput: 1.4455922446867258 + estimated_peak_memory_range: + min: 12824576 + max: 118427024 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jglvy8q25 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 745957.0 + throughput: 1.3405598446023028 + estimated_peak_memory_range: + min: 13709312 + max: 14847136 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: j56y8mkvp + job_status: Passed + reference_device_info: + name: SA8255 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8255P Proxy + timestamp: '2024-12-11T22:58:35Z' + - torchscript_onnx_tflite: + inference_time: 725701.0 + throughput: 1.377977982667793 + estimated_peak_memory_range: + min: 12955648 + max: 1205180512 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jgz3ly045 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 782525.0 + throughput: 1.277914443627999 + estimated_peak_memory_range: + min: 380928 + max: 6707824 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: j5welzjz5 + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-12-11T22:58:41Z' + - torchscript_onnx_tflite: + inference_time: 679545.0 + throughput: 1.4715728906842078 + estimated_peak_memory_range: + min: 12771328 + max: 110521576 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jpxklrz85 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 730129.0 + throughput: 1.3696209847848806 + estimated_peak_memory_range: + min: 13459456 + max: 15392392 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: j5mn0kjyp + job_status: Passed + reference_device_info: + name: SA8650 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8650P Proxy + timestamp: '2024-12-11T22:58:47Z' + - torchscript_onnx_qnn: + inference_time: 741020.0 + throughput: 1.3494912418018408 + estimated_peak_memory_range: + min: 1392640 + max: 11599584 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jgke20zyg + job_status: Passed + reference_device_info: + name: SA8775P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8775P + timestamp: '2024-12-11T22:58:54Z' + - reference_device_info: + name: QCS8450 (Proxy) + os: '13' + form_factor: Xr + os_name: Android + manufacturer: Qualcomm + chipset: QCS8450 Proxy + timestamp: '2024-12-11T22:59:00Z' + - torchscript_onnx_qnn: + inference_time: 640949.0 + throughput: 1.5601865359022324 + estimated_peak_memory_range: + min: 12607488 + max: 12607488 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jgdxd4kkp + job_status: Passed + torchscript_onnx: + inference_time: 735548.0 + throughput: 1.3595305812808953 + estimated_peak_memory_range: + min: 53792768 + max: 53792768 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 610 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 610 + job_id: jg9lz21qg + job_status: Passed + reference_device_info: + name: Snapdragon X Elite CRD + os: '11' + form_factor: Compute + os_name: Windows + manufacturer: Qualcomm + chipset: Snapdragon® X Elite + timestamp: '2024-12-11T22:59:23Z' +- name: SAMEncoderPart6 + universal_assets: + torchscript_onnx_tflite: mn7xlrkjq + torchscript_onnx: mqyv3p59q + performance_metrics: + - torchscript_onnx_tflite: + inference_time: 677701.0 + throughput: 1.4755769874915339 + estimated_peak_memory_range: + min: 12779520 + max: 107115776 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jpxklr085 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 841659.0 + throughput: 1.1881296344481553 + estimated_peak_memory_range: + min: 12902400 + max: 114549272 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: j57yen1n5 + job_status: Passed + torchscript_onnx: + inference_time: 684635.0 + throughput: 1.4606323077260146 + estimated_peak_memory_range: + min: 348160 + max: 980103024 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 610 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 610 + job_id: jp4ly4qq5 + job_status: Passed + reference_device_info: + name: Samsung Galaxy S23 + os: '13' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 2 + timestamp: '2024-12-11T22:59:06Z' + - torchscript_onnx_tflite: + inference_time: 536637.0 + throughput: 1.8634570482467665 + estimated_peak_memory_range: + min: 10993664 + max: 1152279456 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jp8qerrqp + job_status: Passed + reference_device_info: + name: Samsung Galaxy S24 + os: '14' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-12-11T22:59:11Z' + - torchscript_onnx_tflite: + inference_time: 485849.0 + throughput: 2.058252666980893 + estimated_peak_memory_range: + min: 11517952 + max: 1145940448 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jpv6lmmz5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 579641.0 + throughput: 1.7252057739186841 + estimated_peak_memory_range: + min: 10674176 + max: 1168460320 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jp3jz7dmg + job_status: Passed + torchscript_onnx: + inference_time: 472010.0 + throughput: 2.1185991822207155 + estimated_peak_memory_range: + min: 40701952 + max: 4975100368 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 610 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 610 + job_id: jgo2lw04p + job_status: Passed + reference_device_info: + name: Snapdragon 8 Elite QRD + os: '15' + form_factor: Phone + os_name: Android + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-12-11T22:59:17Z' + - torchscript_onnx_tflite: + inference_time: 656062.0 + throughput: 1.5242461840496782 + estimated_peak_memory_range: + min: 12738560 + max: 118537032 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jgdxd446p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 730888.0 + throughput: 1.3681986843401452 + estimated_peak_memory_range: + min: 12947456 + max: 14557048 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jg9lz2rmg + job_status: Passed + reference_device_info: + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: QCS8550 Proxy + timestamp: '2024-12-11T22:58:16Z' + - torchscript_onnx_qnn: + inference_time: 1868576.0 + throughput: 0.535166886441868 + estimated_peak_memory_range: + min: 4747264 + max: 15523088 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jpy1o29rp + job_status: Passed + reference_device_info: + name: SA7255P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA7255P + timestamp: '2024-12-11T22:58:29Z' + - torchscript_onnx_tflite: + inference_time: 688443.0 + throughput: 1.452553079920923 + estimated_peak_memory_range: + min: 12759040 + max: 106572680 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: j56y8m0np + job_status: Passed + torchscript_onnx_qnn: + inference_time: 736226.0 + throughput: 1.3582785720689028 + estimated_peak_memory_range: + min: 13021184 + max: 14720088 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jp3jz7yxg + job_status: Passed + reference_device_info: + name: SA8255 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8255P Proxy + timestamp: '2024-12-11T22:58:35Z' + - torchscript_onnx_tflite: + inference_time: 725807.0 + throughput: 1.37777673679091 + estimated_peak_memory_range: + min: 5271552 + max: 1197448048 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: j5welzr45 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 782131.0 + throughput: 1.2785581954941052 + estimated_peak_memory_range: + min: 397312 + max: 6679184 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jg9lz26qg + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-12-11T22:58:42Z' + - torchscript_onnx_tflite: + inference_time: 672876.0 + throughput: 1.4861579250857513 + estimated_peak_memory_range: + min: 12820480 + max: 108542600 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: j5mn0kl7p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 740495.0 + throughput: 1.3504480111276915 + estimated_peak_memory_range: + min: 17268736 + max: 18567472 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: jgn6zq2v5 + job_status: Passed + reference_device_info: + name: SA8650 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8650P Proxy + timestamp: '2024-12-11T22:58:48Z' + - torchscript_onnx_tflite: + inference_time: 726536.0 + throughput: 1.3763942874131496 + estimated_peak_memory_range: + min: 229376 + max: 1157817104 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 580 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 580 + job_id: jgke20wvg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 742038.0 + throughput: 1.3476398782811663 + estimated_peak_memory_range: + min: 1609728 + max: 12063184 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: j5q6l187p + job_status: Passed + reference_device_info: + name: SA8775P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8775P + timestamp: '2024-12-11T22:58:54Z' + - reference_device_info: + name: QCS8450 (Proxy) + os: '13' + form_factor: Xr + os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:34:33Z' + timestamp: '2024-12-11T22:59:00Z' + - torchscript_onnx_qnn: + inference_time: 629137.0 + throughput: 1.5894789211252875 + estimated_peak_memory_range: + min: 12607488 + max: 12607488 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 613 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 613 + job_id: j57yenmq5 + job_status: Passed + torchscript_onnx: + inference_time: 743947.0 + throughput: 1.3441817763899848 + estimated_peak_memory_range: + min: 54714368 + max: 54714368 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 610 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 610 + job_id: jp14n1lkp + job_status: Passed + reference_device_info: + name: Snapdragon X Elite CRD + os: '11' + form_factor: Compute + os_name: Windows + manufacturer: Qualcomm + chipset: Snapdragon® X Elite + timestamp: '2024-12-11T22:59:23Z' diff --git a/qai_hub_models/models/sam/test.py b/qai_hub_models/models/sam/test.py index fd862267..b8afee29 100644 --- a/qai_hub_models/models/sam/test.py +++ b/qai_hub_models/models/sam/test.py @@ -2,79 +2,123 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- + +from typing import cast + import numpy as np -import pytest import torch -from qai_hub_models.models.sam.app import SAMApp +from qai_hub_models.models.sam.app import SAMApp, SAMInputImageLayout from qai_hub_models.models.sam.demo import IMAGE_ADDRESS from qai_hub_models.models.sam.demo import main as demo_main -from qai_hub_models.models.sam.model import SMALL_MODEL_TYPE, SAMQAIHMWrapper +from qai_hub_models.models.sam.model import ( + SMALL_MODEL_TYPE, + SamOnnxModel, + SamPredictor, + SAMQAIHMWrapper, + _get_weights_path, + sam_model_registry, +) from qai_hub_models.utils.asset_loaders import load_image -from qai_hub_models.utils.testing import skip_clone_repo_check_fixture # noqa: F401 +from qai_hub_models.utils.testing import assert_most_close # noqa: F401 -@pytest.fixture(scope="module") -def input_image_data() -> np.ndarray: - return np.asarray(load_image(IMAGE_ADDRESS)) +def test_e2e_numerical(): + """Verify our driver produces the correct segmentation as source PyTorch model""" + model_type = SMALL_MODEL_TYPE + + # OOTB SAM Objects + sam_without_our_edits = sam_model_registry[model_type]( + _get_weights_path(model_type) + ) + sam_predictor = SamPredictor(sam_without_our_edits) + sam_onnx_decoder = SamOnnxModel(sam_predictor.model, return_single_mask=True) + # QAIHM SAMApp + qaihm_sam = SAMQAIHMWrapper.from_pretrained(model_type) + qaihm_app = SAMApp( + qaihm_sam.sam.image_encoder.img_size, + qaihm_sam.sam.mask_threshold, + SAMInputImageLayout[qaihm_sam.sam.image_format], + qaihm_sam.encoder_splits, + qaihm_sam.decoder, + ) -def test_e2e_numerical( - input_image_data: np.ndarray, - monkeypatch, - skip_clone_repo_check_fixture, -): - """Verify our driver produces the correct segmentation as source PyTorch model""" - monkeypatch.setattr("builtins.input", lambda: "y") + # + # Inputs + # + input_image_data = np.asarray(load_image(IMAGE_ADDRESS)) + point_coords = torch.tensor([[[313, 167]]]) + point_labels = torch.randint(low=0, high=4, size=(1, 1), dtype=torch.float) + mask_input = torch.zeros( + [1, 1, qaihm_sam.decoder.embed_size[0] * 4, qaihm_sam.decoder.embed_size[1] * 4] + ) + has_mask_input = torch.zeros([1]) - sam_wrapper = SAMQAIHMWrapper.from_pretrained(SMALL_MODEL_TYPE) - sam_model = sam_wrapper.get_sam() - sam_predictor = sam_wrapper.SamPredictor(sam_model) - sam_decoder = sam_wrapper.SamOnnxModel( - sam_model, orig_img_size=input_image_data.shape[:2], return_single_mask=True + # Sam predictor takes coordinates in the resized image coordinate space, rather than the original + # input image coordinate space. We need to transform the coordinates to fit the resized image. + # This happens in the QAIHM SAM App, but not in the SAMPredictor provided by the sam repository. + point_coords_postprocessed = qaihm_app.input_img_size_transform.apply_coords_torch( + point_coords, (input_image_data.shape[0], input_image_data.shape[1]) ) + # + # Verify encoder output + # sam_predictor.set_image(input_image_data) - # QAIHM SAMApp for segmentation - sam_app = SAMApp(sam_wrapper) - # Prepare image for segmentation - sam_app.prepare(input_image_data) - - # Ensure image embeddings match with source model - np.allclose( - sam_predictor.features.detach().numpy(), - sam_app.image_embeddings.detach().numpy(), + sam_predictor_image_embeddings = cast(torch.Tensor, sam_predictor.features) + qaihm_image_embeddings, qaihm_input_image_size = qaihm_app.predict_embeddings( + input_image_data + ) + assert_most_close( + sam_predictor_image_embeddings.numpy(), + qaihm_image_embeddings.numpy(), + 0.005, + rtol=0.001, + atol=0.001, ) # - # Verify Decoder output is correct + # Verify Decoder output + # Use embeddings from SAM predictor to make sure the inputs to both decoders are the same. # - # Create input for decoder - embed_size = sam_predictor.model.prompt_encoder.image_embedding_size - mask_input_size = [4 * x for x in embed_size] - decoder_inputs = { - "image_embeddings": sam_predictor.features.detach(), - "point_coords": torch.randint(low=0, high=500, size=(1, 2), dtype=torch.float), - "point_labels": torch.randint(low=0, high=4, size=(1,), dtype=torch.float), - "mask_input": torch.zeros(1, 1, *mask_input_size, dtype=torch.float), - "has_mask_input": torch.tensor([1], dtype=torch.float), - } - - # Perform inference for decoder models - obs_decoder_output = sam_app.generate_mask_from_points( - decoder_inputs["point_coords"], - decoder_inputs["point_labels"], + # The SAM ONNX decoder has slightly different output compared to the SAM predictor + # Our model is based on the SAM ONNX decoder, so we compare against that instead. + sam_pred_masks, sam_pred_scores, _ = cast( + tuple[torch.Tensor, torch.Tensor, torch.Tensor], + sam_onnx_decoder.forward( + sam_predictor_image_embeddings, + point_coords_postprocessed, + point_labels, + mask_input, + has_mask_input, + torch.Tensor(qaihm_input_image_size), + ), ) - decoder_inputs["point_coords"] = decoder_inputs["point_coords"].unsqueeze(0) - decoder_inputs["point_labels"] = decoder_inputs["point_labels"].unsqueeze(0) - exp_decoder_output = sam_decoder(*decoder_inputs.values()) + ( + qaihm_pred_masks, + qaihm_pred_scores, + ) = qaihm_app.predict_mask_from_points_and_embeddings( + sam_predictor_image_embeddings, + qaihm_input_image_size, + point_coords, + point_labels, + return_logits=True, + ) - # Ensure segmentation upscaled mask, scores and low-res masks match with source model - for exp, obs in zip(exp_decoder_output, obs_decoder_output): - np.allclose(exp.detach().numpy(), obs.detach().numpy()) + assert_most_close( + sam_pred_masks.numpy(), qaihm_pred_masks.numpy(), 0.005, rtol=0.001, atol=0.001 + ) + assert_most_close( + sam_pred_scores.numpy(), + qaihm_pred_scores.numpy(), + 0.005, + rtol=0.001, + atol=0.001, + ) -def test_demo(skip_clone_repo_check_fixture): +def test_demo(): demo_main(is_test=True) diff --git a/qai_hub_models/models/sesr_m5/perf.yaml b/qai_hub_models/models/sesr_m5/perf.yaml index 82cc8a8f..a5e843b9 100644 --- a/qai_hub_models/models/sesr_m5/perf.yaml +++ b/qai_hub_models/models/sesr_m5/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: SESR-M5 universal_assets: - torchscript_onnx_tflite: mm5d9v86n - torchscript_onnx: mq24765lm + torchscript_onnx_tflite: mm5ed7p4m + torchscript_onnx: mn1wzporm performance_metrics: - torchscript_onnx_tflite: - inference_time: 2290.0 - throughput: 436.68122270742356 + inference_time: 2264.0 + throughput: 441.69611307420496 estimated_peak_memory_range: - min: 28672 - max: 4383984 + min: 16384 + max: 12504504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: jp1472k2p + job_id: jped7x7v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2146.0 - throughput: 465.98322460391427 + inference_time: 2137.0 + throughput: 467.94571829667757 estimated_peak_memory_range: - min: 16384 - max: 72930816 + min: 28672 + max: 7860344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jp0zd4865 + job_id: jgn6zqzq5 job_status: Passed torchscript_onnx: - inference_time: 2680.0 - throughput: 373.13432835820896 + inference_time: 2722.0 + throughput: 367.37692872887584 estimated_peak_memory_range: - min: 212992 - max: 1600616 + min: 20480 + max: 2109632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 33 - job_id: jgz382qk5 + job_id: jgo2lwdkp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:33:15Z' + timestamp: '2024-12-11T22:55:56Z' - torchscript_onnx_tflite: - inference_time: 1556.0 - throughput: 642.6735218508998 + inference_time: 1538.0 + throughput: 650.1950585175553 estimated_peak_memory_range: min: 16384 - max: 12893664 + max: 13278848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: jgdx8nyep + job_id: jgz3lylx5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1466.0 - throughput: 682.1282401091405 + inference_time: 1445.0 + throughput: 692.0415224913495 estimated_peak_memory_range: min: 208896 - max: 14066992 + max: 14144240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jp8q62dxp + job_id: jprvldl7g job_status: Passed torchscript_onnx: - inference_time: 1942.0 - throughput: 514.9330587023687 + inference_time: 1941.0 + throughput: 515.1983513652756 estimated_peak_memory_range: min: 0 - max: 31950864 + max: 31729696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 33 - job_id: j5we8wk65 + job_id: jpv6lm2r5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:33:16Z' + timestamp: '2024-12-11T22:55:57Z' - torchscript_onnx_tflite: - inference_time: 1718.0 - throughput: 582.0721769499418 + inference_time: 1688.0 + throughput: 592.4170616113744 estimated_peak_memory_range: min: 12288 - max: 10100256 + max: 9549280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: j57yk21l5 + job_id: j5welzlm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1337.0 - throughput: 747.9431563201197 + inference_time: 1585.0 + throughput: 630.9148264984227 estimated_peak_memory_range: min: 0 - max: 10043552 + max: 10279072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jgkeovw2g + job_id: jp2krdrqp job_status: Passed torchscript_onnx: inference_time: 2002.0 throughput: 499.5004995004995 estimated_peak_memory_range: min: 0 - max: 17462992 + max: 17359408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 33 - job_id: jg9lk0rlg + job_id: jgjvry3eg job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:33:17Z' + timestamp: '2024-12-11T22:55:58Z' - torchscript_onnx_tflite: - inference_time: 2201.0 - throughput: 454.3389368468878 + inference_time: 2241.0 + throughput: 446.2293618920125 estimated_peak_memory_range: min: 16384 - max: 5239552 + max: 68613352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: jp4lmn6v5 + job_id: jg9lz2z8g job_status: Passed torchscript_onnx_qnn: - inference_time: 2144.0 - throughput: 466.4179104477612 + inference_time: 2227.0 + throughput: 449.034575662326 estimated_peak_memory_range: - min: 225280 - max: 1458496 + min: 229376 + max: 1393328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j5q6z0x4p + job_id: jpy1o2olp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:33:05Z' + timestamp: '2024-12-11T22:55:47Z' - torchscript_onnx_tflite: - inference_time: 78598.0 - throughput: 12.722970050128502 + inference_time: 80020.0 + throughput: 12.496875781054737 estimated_peak_memory_range: - min: 6434816 - max: 17003872 + min: 6336512 + max: 19434128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: jpxk39815 + job_id: jp14n1n7p job_status: Passed torchscript_onnx_qnn: - inference_time: 77493.0 - throughput: 12.904391364381299 + inference_time: 77529.0 + throughput: 12.898399308645796 estimated_peak_memory_range: - min: 315392 - max: 6014672 + min: 266240 + max: 10837856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j56yr290p + job_id: jp8qereop job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:33:07Z' + timestamp: '2024-12-11T22:55:49Z' - torchscript_onnx_tflite: - inference_time: 2203.0 - throughput: 453.92646391284615 + inference_time: 2216.0 + throughput: 451.2635379061372 estimated_peak_memory_range: - min: 6332416 - max: 77710352 + min: 28672 + max: 5095040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: j5mnoe1wp + job_id: jgdxd4dzp job_status: Passed torchscript_onnx_qnn: - inference_time: 2165.0 - throughput: 461.8937644341801 + inference_time: 2176.0 + throughput: 459.55882352941177 estimated_peak_memory_range: - min: 229376 - max: 1453312 + min: 225280 + max: 1588424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jp3jxnllg + job_id: jgke202ng job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:33:08Z' + timestamp: '2024-12-11T22:55:50Z' - torchscript_onnx_tflite: - inference_time: 6026.0 - throughput: 165.94756057085962 + inference_time: 6099.0 + throughput: 163.96130513198884 estimated_peak_memory_range: - min: 6283264 - max: 15074176 + min: 6307840 + max: 14949232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: jgn6o0dr5 + job_id: j57yene95 job_status: Passed torchscript_onnx_qnn: - inference_time: 3576.0 - throughput: 279.6420581655481 + inference_time: 3731.0 + throughput: 268.0246582685607 estimated_peak_memory_range: - min: 0 - max: 6050240 + min: 212992 + max: 6365408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jgo2oz7xp + job_id: j5q6l1lop job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:33:09Z' + timestamp: '2024-12-11T22:55:52Z' - torchscript_onnx_tflite: - inference_time: 2163.0 - throughput: 462.32085067036525 + inference_time: 2263.0 + throughput: 441.8912947414936 estimated_peak_memory_range: min: 20480 - max: 80843352 + max: 70679448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: jprvo6m9g + job_id: jp4ly4y15 job_status: Passed torchscript_onnx_qnn: - inference_time: 2179.0 - throughput: 458.9261128958238 + inference_time: 2175.0 + throughput: 459.7701149425287 estimated_peak_memory_range: min: 229376 - max: 2073144 + max: 1620808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jpv6eqyj5 + job_id: jglvy8wm5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:33:11Z' + timestamp: '2024-12-11T22:55:53Z' - torchscript_onnx_tflite: - inference_time: 4441.0 - throughput: 225.17451024544022 + inference_time: 4536.0 + throughput: 220.4585537918871 estimated_peak_memory_range: - min: 6307840 - max: 15477520 + min: 6328320 + max: 20066640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: jp2k4xq4p + job_id: jpxklrll5 job_status: Passed torchscript_onnx_qnn: - inference_time: 4510.0 - throughput: 221.72949002217294 + inference_time: 4471.0 + throughput: 223.66360993066428 estimated_peak_memory_range: - min: 0 - max: 5983408 + min: 212992 + max: 6442672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jgjvod6xg + job_id: j56y8moyp job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:33:12Z' + timestamp: '2024-12-11T22:55:54Z' - torchscript_onnx_tflite: - inference_time: 3661.0 - throughput: 273.1494127287626 + inference_time: 3487.0 + throughput: 286.77946659019216 estimated_peak_memory_range: min: 16384 - max: 13375008 + max: 13325888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: jpy1qzk7p + job_id: j5mn0k09p job_status: Passed torchscript_onnx_qnn: - inference_time: 3219.0 - throughput: 310.65548306927616 + inference_time: 3196.0 + throughput: 312.89111389236547 estimated_peak_memory_range: min: 208896 - max: 16077040 + max: 16012576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jped8o015 + job_id: jp3jz7ong job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,13 +471,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:33:13Z' + timestamp: '2024-12-11T22:55:55Z' - torchscript_onnx_qnn: - inference_time: 2379.0 - throughput: 420.3446826397646 + inference_time: 2356.0 + throughput: 424.44821731748726 estimated_peak_memory_range: - min: 237568 - max: 237568 + min: 221184 + max: 221184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jglvo4985 + job_id: jp0zm9mn5 job_status: Passed torchscript_onnx: - inference_time: 3001.0 - throughput: 333.2222592469177 + inference_time: 3009.0 + throughput: 332.33632436025255 estimated_peak_memory_range: - min: 8916992 - max: 8916992 + min: 8863744 + max: 8863744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 33 - job_id: jp147292p + job_id: jped7x6v5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:33:18Z' + timestamp: '2024-12-11T22:55:59Z' diff --git a/qai_hub_models/models/sesr_m5_quantized/info.yaml b/qai_hub_models/models/sesr_m5_quantized/info.yaml index cd70e7b6..7bf9a0ad 100644 --- a/qai_hub_models/models/sesr_m5_quantized/info.yaml +++ b/qai_hub_models/models/sesr_m5_quantized/info.yaml @@ -6,7 +6,9 @@ headline: Upscale images in real time. domain: Computer Vision use_case: Super Resolution description: SESR M5 performs efficient on-device upscaling of images. -tags: [quantized] +tags: + - quantized +imsdk_supported: true research_paper: https://arxiv.org/abs/2103.09404 research_paper_title: Collapsible Linear Blocks for Super-Efficient Super Resolution license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf diff --git a/qai_hub_models/models/sesr_m5_quantized/perf.yaml b/qai_hub_models/models/sesr_m5_quantized/perf.yaml index 88d7a95b..bd0626eb 100644 --- a/qai_hub_models/models/sesr_m5_quantized/perf.yaml +++ b/qai_hub_models/models/sesr_m5_quantized/perf.yaml @@ -47,15 +47,15 @@ aggregated: models: - name: SESR-M5-Quantized universal_assets: - torchscript_onnx_tflite: mn7lk4pjq - torchscript_onnx: mnjxkvkxq + torchscript_onnx_tflite: mnz1vzdpq + torchscript_onnx: mn4l1e7vq performance_metrics: - torchscript_onnx_tflite: - inference_time: 1356.0 - throughput: 737.4631268436578 + inference_time: 1360.0 + throughput: 735.2941176470588 estimated_peak_memory_range: min: 16384 - max: 7018016 + max: 19770136 primary_compute_unit: NPU precision: int8 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 27 - job_id: jgkeol0yg + job_id: jpv6lo675 job_status: Passed torchscript_onnx_qnn: - inference_time: 980.0 - throughput: 1020.4081632653061 + inference_time: 977.0 + throughput: 1023.5414534288639 estimated_peak_memory_range: - min: 16384 - max: 58660000 + min: 28672 + max: 10280712 primary_compute_unit: NPU precision: int8 layer_info: @@ -78,14 +78,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j57ykjnq5 + job_id: jp2kr6xxp job_status: Passed torchscript_onnx: - inference_time: 2724.0 - throughput: 367.1071953010279 + inference_time: 2729.0 + throughput: 366.43459142543054 estimated_peak_memory_range: min: 69632 - max: 2777920 + max: 2366440 primary_compute_unit: NPU precision: int8 layer_info: @@ -93,7 +93,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 8 total_layers: 81 - job_id: j56yr30vp + job_id: jped71o75 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -102,13 +102,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T03:45:25Z' + timestamp: '2024-12-12T01:14:30Z' - torchscript_onnx_tflite: - inference_time: 1125.0 - throughput: 888.8888888888889 + inference_time: 1109.0 + throughput: 901.7132551848512 estimated_peak_memory_range: min: 16384 - max: 15007792 + max: 15522128 primary_compute_unit: NPU precision: int8 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 27 - job_id: j5q6z717p + job_id: jgjvrmv7g job_status: Passed torchscript_onnx_qnn: - inference_time: 712.0 - throughput: 1404.4943820224719 + inference_time: 705.0 + throughput: 1418.4397163120568 estimated_peak_memory_range: - min: 61440 - max: 14359600 + min: 0 + max: 14640688 primary_compute_unit: NPU precision: int8 layer_info: @@ -131,14 +131,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jp4lmx4q5 + job_id: jpy1owzrp job_status: Passed torchscript_onnx: - inference_time: 1958.0 - throughput: 510.7252298263534 + inference_time: 1950.0 + throughput: 512.8205128205128 estimated_peak_memory_range: min: 0 - max: 78664336 + max: 81606528 primary_compute_unit: NPU precision: int8 layer_info: @@ -146,7 +146,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 8 total_layers: 81 - job_id: jp3jx4rxg + job_id: jgz3l92z5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -155,13 +155,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T03:45:28Z' + timestamp: '2024-12-12T01:14:33Z' - torchscript_onnx_tflite: - inference_time: 1025.0 - throughput: 975.609756097561 + inference_time: 1539.0 + throughput: 649.772579597141 estimated_peak_memory_range: min: 12288 - max: 11587392 + max: 11542496 primary_compute_unit: NPU precision: int8 layer_info: @@ -169,14 +169,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 27 - job_id: j56yr3mvp + job_id: jped71d75 job_status: Passed torchscript_onnx_qnn: - inference_time: 615.0 - throughput: 1626.0162601626016 + inference_time: 733.0 + throughput: 1364.256480218281 estimated_peak_memory_range: - min: 65536 - max: 12361408 + min: 0 + max: 11095872 primary_compute_unit: NPU precision: int8 layer_info: @@ -184,14 +184,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j5mnowkyp + job_id: jp0zmq425 job_status: Passed torchscript_onnx: - inference_time: 1874.0 - throughput: 533.6179295624333 + inference_time: 1867.0 + throughput: 535.6186395286556 estimated_peak_memory_range: - min: 0 - max: 57941184 + min: 61440 + max: 57940400 primary_compute_unit: NPU precision: int8 layer_info: @@ -199,7 +199,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 8 total_layers: 81 - job_id: jgo2o194p + job_id: j5welvwz5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -208,13 +208,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T03:45:31Z' + timestamp: '2024-12-12T01:14:35Z' - torchscript_onnx_tflite: - inference_time: 3795.0 - throughput: 263.5046113306983 + inference_time: 5357.0 + throughput: 186.67164457718872 estimated_peak_memory_range: min: 1585152 - max: 16968304 + max: 20511376 primary_compute_unit: NPU precision: int8 layer_info: @@ -222,14 +222,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 27 - job_id: jp3jx47xg + job_id: jgz3l93z5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2988.0 - throughput: 334.67202141900935 + inference_time: 3114.0 + throughput: 321.13037893384717 estimated_peak_memory_range: - min: 65536 - max: 7774656 + min: 36864 + max: 12185216 primary_compute_unit: NPU precision: int8 layer_info: @@ -237,7 +237,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jgn6o9wv5 + job_id: jp8qe92zp job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -246,13 +246,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T03:44:59Z' + timestamp: '2024-12-12T01:14:03Z' - torchscript_onnx_tflite: - inference_time: 21370.0 - throughput: 46.79457182966776 + inference_time: 19181.0 + throughput: 52.13492518638236 estimated_peak_memory_range: - min: 1785856 - max: 11847280 + min: 1708032 + max: 4141680 primary_compute_unit: NPU precision: int8 layer_info: @@ -260,7 +260,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 27 - job_id: jgo2o1w4p + job_id: j5welvez5 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -269,13 +269,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T03:44:29Z' + timestamp: '2024-12-12T01:13:33Z' - torchscript_onnx_tflite: - inference_time: 1364.0 - throughput: 733.1378299120234 + inference_time: 1357.0 + throughput: 736.9196757553427 estimated_peak_memory_range: min: 16384 - max: 69261360 + max: 69745224 primary_compute_unit: NPU precision: int8 layer_info: @@ -283,14 +283,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 27 - job_id: jpv6e1m75 + job_id: jg9lz1lqg job_status: Passed torchscript_onnx_qnn: - inference_time: 697.0 - throughput: 1434.7202295552368 + inference_time: 690.0 + throughput: 1449.2753623188405 estimated_peak_memory_range: - min: 86016 - max: 1457960 + min: 0 + max: 1282112 primary_compute_unit: NPU precision: int8 layer_info: @@ -298,7 +298,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jprvo47vg + job_id: jgke2nvyg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -307,13 +307,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T03:45:03Z' + timestamp: '2024-12-12T01:14:07Z' - torchscript_onnx_tflite: - inference_time: 14621.0 - throughput: 68.39477463921756 + inference_time: 13031.0 + throughput: 76.74008134448623 estimated_peak_memory_range: - min: 1622016 - max: 14084240 + min: 1679360 + max: 15771312 primary_compute_unit: NPU precision: int8 layer_info: @@ -321,14 +321,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 27 - job_id: jgjvo0y7g + job_id: jgdxd9xkp job_status: Passed torchscript_onnx_qnn: - inference_time: 10962.0 - throughput: 91.22422915526364 + inference_time: 10992.0 + throughput: 90.97525473071325 estimated_peak_memory_range: - min: 28672 - max: 5633952 + min: 12288 + max: 10473664 primary_compute_unit: NPU precision: int8 layer_info: @@ -336,7 +336,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jpy1q4yrp + job_id: jglvyz4e5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -345,28 +345,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T03:45:08Z' - - torchscript_onnx_tflite: - inference_time: 1363.0 - throughput: 733.6757153338225 - estimated_peak_memory_range: - min: 24576 - max: 3750208 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 24 - layers_on_gpu: 0 - layers_on_cpu: 3 - total_layers: 27 - job_id: jped8rx75 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 687.0 - throughput: 1455.604075691412 + timestamp: '2024-12-12T01:14:13Z' + - torchscript_onnx_qnn: + inference_time: 689.0 + throughput: 1451.3788098693758 estimated_peak_memory_range: - min: 126976 - max: 1802536 + min: 16384 + max: 1608016 primary_compute_unit: NPU precision: int8 layer_info: @@ -374,7 +359,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jp0zd1x25 + job_id: j56y8j2vp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -383,13 +368,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T03:45:11Z' + timestamp: '2024-12-12T01:14:16Z' - torchscript_onnx_tflite: - inference_time: 2534.0 - throughput: 394.6329913180742 + inference_time: 2492.0 + throughput: 401.2841091492777 estimated_peak_memory_range: min: 16384 - max: 10267808 + max: 9798688 primary_compute_unit: NPU precision: int8 layer_info: @@ -397,14 +382,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 27 - job_id: jgz38xyz5 + job_id: jp4lyolq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2132.0 - throughput: 469.04315196998124 + inference_time: 1608.0 + throughput: 621.8905472636816 estimated_peak_memory_range: min: 12288 - max: 5904112 + max: 5976784 primary_compute_unit: NPU precision: int8 layer_info: @@ -412,7 +397,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jp8q63kzp + job_id: jp3jz3nxg job_status: Passed reference_device_info: name: SA8295P ADP @@ -421,13 +406,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T03:45:14Z' + timestamp: '2024-12-12T01:14:19Z' - torchscript_onnx_tflite: - inference_time: 1353.0 - throughput: 739.0983000739099 + inference_time: 1365.0 + throughput: 732.6007326007326 estimated_peak_memory_range: - min: 16384 - max: 17369704 + min: 815104 + max: 8001864 primary_compute_unit: NPU precision: int8 layer_info: @@ -435,14 +420,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 27 - job_id: j5we8dzz5 + job_id: jpxkljkj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 687.0 - throughput: 1455.604075691412 + inference_time: 770.0 + throughput: 1298.7012987012988 estimated_peak_memory_range: - min: 81920 - max: 1356984 + min: 114688 + max: 1418760 primary_compute_unit: NPU precision: int8 layer_info: @@ -450,7 +435,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jgkeolkyg + job_id: jgo2l0z4p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -459,13 +444,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T03:45:17Z' + timestamp: '2024-12-12T01:14:21Z' - torchscript_onnx_tflite: - inference_time: 2438.0 - throughput: 410.17227235438884 + inference_time: 2473.0 + throughput: 404.36716538617065 estimated_peak_memory_range: - min: 217088 - max: 10436064 + min: 671744 + max: 16314128 primary_compute_unit: NPU precision: int8 layer_info: @@ -473,14 +458,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 27 - job_id: jg9lk32qg + job_id: j5mn02nyp job_status: Passed torchscript_onnx_qnn: - inference_time: 1297.0 - throughput: 771.0100231303007 + inference_time: 1321.0 + throughput: 757.002271006813 estimated_peak_memory_range: - min: 20480 - max: 5796752 + min: 16384 + max: 5976672 primary_compute_unit: NPU precision: int8 layer_info: @@ -488,7 +473,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j5q6z7d7p + job_id: jpv6loq75 job_status: Passed reference_device_info: name: SA8775P ADP @@ -497,13 +482,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T03:45:20Z' + timestamp: '2024-12-12T01:14:24Z' - torchscript_onnx_tflite: - inference_time: 2490.0 - throughput: 401.60642570281124 + inference_time: 2572.0 + throughput: 388.8024883359254 estimated_peak_memory_range: min: 16384 - max: 13491984 + max: 16596560 primary_compute_unit: NPU precision: int8 layer_info: @@ -511,14 +496,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 27 - job_id: jp147d1kp + job_id: jgn6zy0v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1124.0 - throughput: 889.6797153024911 + inference_time: 1120.0 + throughput: 892.8571428571429 estimated_peak_memory_range: - min: 65536 - max: 16241728 + min: 61440 + max: 18123376 primary_compute_unit: NPU precision: int8 layer_info: @@ -526,7 +511,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jglvo0qe5 + job_id: jgjvrmd7g job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -535,13 +520,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T03:45:22Z' + timestamp: '2024-12-12T01:14:27Z' - torchscript_onnx_qnn: - inference_time: 1226.0 - throughput: 815.6606851549756 + inference_time: 814.0 + throughput: 1228.5012285012285 estimated_peak_memory_range: - min: 126976 - max: 126976 + min: 176128 + max: 176128 primary_compute_unit: NPU precision: int8 layer_info: @@ -549,22 +534,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jp2k47zxp - job_status: Passed - torchscript_onnx: - inference_time: 3152.0 - throughput: 317.25888324873097 - estimated_peak_memory_range: - min: 2297856 - max: 2297856 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 73 - layers_on_gpu: 0 - layers_on_cpu: 8 - total_layers: 81 - job_id: jpv6e1n75 + job_id: j5q6lk07p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -573,4 +543,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T03:45:34Z' + timestamp: '2024-12-12T01:14:38Z' diff --git a/qai_hub_models/models/shufflenet_v2/perf.yaml b/qai_hub_models/models/shufflenet_v2/perf.yaml index 0316e8b9..3f2f821a 100644 --- a/qai_hub_models/models/shufflenet_v2/perf.yaml +++ b/qai_hub_models/models/shufflenet_v2/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: Shufflenet-v2 universal_assets: - torchscript_onnx_tflite: mmx71jorq - torchscript_onnx: mq2476vlm + torchscript_onnx_tflite: mm5ed7o4m + torchscript_onnx: mqyv3p9vq performance_metrics: - torchscript_onnx_tflite: - inference_time: 1205.0 - throughput: 829.8755186721992 + inference_time: 1208.0 + throughput: 827.8145695364238 estimated_peak_memory_range: - min: 28672 - max: 90102760 + min: 16384 + max: 15990632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jgjvody8g + job_id: j5q6l1vop job_status: Passed torchscript_onnx_qnn: - inference_time: 771.0 - throughput: 1297.0168612191958 + inference_time: 774.0 + throughput: 1291.9896640826873 estimated_peak_memory_range: - min: 622592 - max: 76588104 + min: 16384 + max: 5277984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jgn6o0wm5 + job_id: jg9lz2m8g job_status: Passed torchscript_onnx: - inference_time: 1125.0 - throughput: 888.8888888888889 + inference_time: 1117.0 + throughput: 895.2551477170994 estimated_peak_memory_range: - min: 20480 - max: 4506016 + min: 618496 + max: 2182880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 223 - job_id: jgjvod88g + job_id: jp8qerxop job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:32:30Z' + timestamp: '2024-12-11T22:55:13Z' - torchscript_onnx_tflite: - inference_time: 792.0 - throughput: 1262.6262626262626 + inference_time: 791.0 + throughput: 1264.2225031605562 estimated_peak_memory_range: - min: 12288 - max: 11497568 + min: 16384 + max: 12723312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jped8ox05 + job_id: jglvy8xm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 512.0 - throughput: 1953.125 + inference_time: 514.0 + throughput: 1945.5252918287938 estimated_peak_memory_range: min: 0 - max: 12699520 + max: 17539328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jprvo67eg + job_id: jp14n1j7p job_status: Passed torchscript_onnx: - inference_time: 720.0 - throughput: 1388.888888888889 + inference_time: 731.0 + throughput: 1367.9890560875513 estimated_peak_memory_range: min: 0 - max: 43435712 + max: 42890960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 223 - job_id: jped8on05 + job_id: jgke204ng job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:32:31Z' + timestamp: '2024-12-11T22:55:14Z' - torchscript_onnx_tflite: - inference_time: 816.0 - throughput: 1225.4901960784314 + inference_time: 666.0 + throughput: 1501.5015015015015 estimated_peak_memory_range: - min: 12288 - max: 11375376 + min: 847872 + max: 11679728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jgz382y65 + job_id: j56y8m7yp job_status: Passed torchscript_onnx_qnn: - inference_time: 500.0 - throughput: 2000.0 + inference_time: 413.0 + throughput: 2421.3075060532688 estimated_peak_memory_range: min: 0 - max: 10699904 + max: 14125472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jp2k4xzmp + job_id: jgdxd43zp job_status: Passed torchscript_onnx: inference_time: 787.0 throughput: 1270.6480304955528 estimated_peak_memory_range: min: 0 - max: 24210688 + max: 24399296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 223 - job_id: jgz382065 + job_id: j5q6l1yop job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:32:32Z' + timestamp: '2024-12-11T22:55:15Z' - torchscript_onnx_tflite: - inference_time: 1199.0 - throughput: 834.0283569641368 + inference_time: 1210.0 + throughput: 826.4462809917355 estimated_peak_memory_range: - min: 12288 - max: 28300656 + min: 20480 + max: 15889392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: j5we8wzj5 + job_id: jp3jz79ng job_status: Passed torchscript_onnx_qnn: - inference_time: 738.0 - throughput: 1355.0135501355014 + inference_time: 730.0 + throughput: 1369.86301369863 estimated_peak_memory_range: - min: 626688 - max: 1941848 + min: 647168 + max: 2129160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jpy1qzy4p + job_id: jp4ly4115 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:32:21Z' + timestamp: '2024-12-11T22:55:04Z' - torchscript_onnx_tflite: - inference_time: 5091.0 - throughput: 196.42506383814575 + inference_time: 5107.0 + throughput: 195.8096729978461 estimated_peak_memory_range: - min: 20480 - max: 11109984 + min: 16384 + max: 14193184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jg9lk02vg + job_id: jgo2lwrkp job_status: Passed torchscript_onnx_qnn: - inference_time: 4553.0 - throughput: 219.63540522732265 + inference_time: 4549.0 + throughput: 219.82853374367994 estimated_peak_memory_range: - min: 552960 - max: 6385264 + min: 569344 + max: 11278224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jp8q62k8p + job_id: j5mn0km9p job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:32:23Z' + timestamp: '2024-12-11T22:55:07Z' - torchscript_onnx_tflite: - inference_time: 1213.0 - throughput: 824.4023083264633 + inference_time: 1209.0 + throughput: 827.129859387924 estimated_peak_memory_range: min: 16384 - max: 15811416 + max: 5535448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jgdx8n4lp + job_id: jpv6lmdr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 742.0 - throughput: 1347.7088948787061 + inference_time: 739.0 + throughput: 1353.1799729364006 estimated_peak_memory_range: - min: 643072 - max: 2301464 + min: 634880 + max: 2186264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: j5q6z0dmp + job_id: jgn6zqnq5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:32:25Z' + timestamp: '2024-12-11T22:55:08Z' - torchscript_onnx_tflite: - inference_time: 1766.0 - throughput: 566.2514156285391 + inference_time: 1759.0 + throughput: 568.5048322910744 estimated_peak_memory_range: min: 16384 - max: 10168960 + max: 9740480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: j57yk28r5 + job_id: jgjvry7eg job_status: Passed torchscript_onnx_qnn: - inference_time: 1323.0 - throughput: 755.8578987150415 + inference_time: 1334.0 + throughput: 749.6251874062968 estimated_peak_memory_range: min: 618496 - max: 6607664 + max: 6737440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jglvo4ql5 + job_id: jprvld07g job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:32:26Z' + timestamp: '2024-12-11T22:55:09Z' - torchscript_onnx_tflite: - inference_time: 1236.0 - throughput: 809.0614886731391 + inference_time: 1212.0 + throughput: 825.0825082508251 estimated_peak_memory_range: - min: 32768 - max: 15862928 + min: 16384 + max: 26279800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jp4lmn2l5 + job_id: jped7xzv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 742.0 - throughput: 1347.7088948787061 + inference_time: 739.0 + throughput: 1353.1799729364006 estimated_peak_memory_range: - min: 638976 - max: 1964224 + min: 2461696 + max: 3815720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: j56yr207p + job_id: jp2krdwqp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:32:27Z' + timestamp: '2024-12-11T22:55:10Z' - torchscript_onnx_tflite: - inference_time: 1816.0 - throughput: 550.6607929515418 + inference_time: 1817.0 + throughput: 550.357732526142 estimated_peak_memory_range: min: 16384 - max: 9056176 + max: 15323536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jpxk39z95 + job_id: jgz3lymx5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1398.0 - throughput: 715.307582260372 + inference_time: 1400.0 + throughput: 714.2857142857143 estimated_peak_memory_range: - min: 622592 - max: 6588624 + min: 618496 + max: 6857936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jp3jxnrzg + job_id: jpy1o2xlp job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:32:28Z' + timestamp: '2024-12-11T22:55:11Z' - torchscript_onnx_tflite: - inference_time: 1322.0 - throughput: 756.4296520423601 + inference_time: 1326.0 + throughput: 754.1478129713424 estimated_peak_memory_range: - min: 36864 - max: 15617008 + min: 16384 + max: 19585344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: j5mnoelqp + job_id: j5welz7m5 job_status: Passed torchscript_onnx_qnn: - inference_time: 892.0 - throughput: 1121.0762331838564 + inference_time: 870.0 + throughput: 1149.4252873563219 estimated_peak_memory_range: - min: 294912 - max: 14648608 + min: 618496 + max: 15425040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jpv6eqnm5 + job_id: jp0zm9jn5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:32:29Z' + timestamp: '2024-12-11T22:55:12Z' - torchscript_onnx_qnn: - inference_time: 907.0 - throughput: 1102.5358324145534 + inference_time: 892.0 + throughput: 1121.0762331838564 estimated_peak_memory_range: min: 602112 max: 602112 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jp0zd4xe5 + job_id: jpxklr4l5 job_status: Passed torchscript_onnx: - inference_time: 1146.0 - throughput: 872.6003490401396 + inference_time: 1112.0 + throughput: 899.2805755395683 estimated_peak_memory_range: - min: 5013504 - max: 5013504 + min: 4141056 + max: 4141056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 223 - job_id: j5we8wrj5 + job_id: jglvy8ym5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:32:33Z' + timestamp: '2024-12-11T22:55:16Z' diff --git a/qai_hub_models/models/shufflenet_v2_quantized/info.yaml b/qai_hub_models/models/shufflenet_v2_quantized/info.yaml index a09ef4b1..4c740b3e 100644 --- a/qai_hub_models/models/shufflenet_v2_quantized/info.yaml +++ b/qai_hub_models/models/shufflenet_v2_quantized/info.yaml @@ -10,6 +10,7 @@ description: ShufflenetV2 is a machine learning model that can classify images f use_case: Image Classification tags: - quantized +imsdk_supported: true research_paper: https://arxiv.org/abs/1807.11164 research_paper_title: 'ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design' diff --git a/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml b/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml index 46cb6ed5..1ead050b 100644 --- a/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml +++ b/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml @@ -50,15 +50,15 @@ aggregated: models: - name: Shufflenet-v2Quantized universal_assets: - torchscript_onnx_tflite: mnzv4ozxq - torchscript_onnx: mq3e1j9lm + torchscript_onnx_tflite: mqkvk6rkm + torchscript_onnx: mn1wzov4m performance_metrics: - torchscript_onnx_tflite: - inference_time: 631.0 - throughput: 1584.7860538827258 + inference_time: 626.0 + throughput: 1597.444089456869 estimated_peak_memory_range: - min: 16384 - max: 8395208 + min: 12288 + max: 4518248 primary_compute_unit: NPU precision: int8 layer_info: @@ -66,14 +66,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 220 - job_id: j57ykjeq5 + job_id: jglvyzm25 job_status: Passed torchscript_onnx_qnn: - inference_time: 593.0 - throughput: 1686.3406408094436 + inference_time: 595.0 + throughput: 1680.672268907563 estimated_peak_memory_range: - min: 176128 - max: 8621400 + min: 16384 + max: 81768744 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,14 +81,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 160 - job_id: j56yr3ovp + job_id: j5welv6z5 job_status: Passed torchscript_onnx: - inference_time: 14988.0 - throughput: 66.72004270082734 + inference_time: 15283.0 + throughput: 65.43217954590068 estimated_peak_memory_range: - min: 2088960 - max: 66611336 + min: 2101248 + max: 5731664 primary_compute_unit: NPU precision: int8 layer_info: @@ -96,7 +96,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 56 total_layers: 453 - job_id: jpxk370j5 + job_id: jp0zmqz25 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -105,13 +105,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T03:43:52Z' + timestamp: '2024-12-12T01:12:52Z' - torchscript_onnx_tflite: - inference_time: 437.0 - throughput: 2288.329519450801 + inference_time: 446.0 + throughput: 2242.152466367713 estimated_peak_memory_range: min: 12288 - max: 14763216 + max: 17011936 primary_compute_unit: NPU precision: int8 layer_info: @@ -119,14 +119,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 220 - job_id: jpxk37lj5 + job_id: j56y8j4np job_status: Passed torchscript_onnx_qnn: - inference_time: 426.0 - throughput: 2347.417840375587 + inference_time: 429.0 + throughput: 2331.002331002331 estimated_peak_memory_range: - min: 0 - max: 15649728 + min: 159744 + max: 13811920 primary_compute_unit: NPU precision: int8 layer_info: @@ -134,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 160 - job_id: jp3jx4oxg + job_id: jg9lz1nqg job_status: Passed torchscript_onnx: - inference_time: 12068.0 - throughput: 82.86377195889956 + inference_time: 12166.0 + throughput: 82.19628472793029 estimated_peak_memory_range: - min: 2101248 - max: 486744864 + min: 2084864 + max: 491212624 primary_compute_unit: NPU precision: int8 layer_info: @@ -149,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 56 total_layers: 453 - job_id: j5mnow9yp + job_id: jp8qe9qzp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -158,13 +158,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T03:43:53Z' + timestamp: '2024-12-12T01:12:54Z' - torchscript_onnx_tflite: - inference_time: 482.0 - throughput: 2074.688796680498 + inference_time: 480.0 + throughput: 2083.3333333333335 estimated_peak_memory_range: min: 8192 - max: 11021088 + max: 11750640 primary_compute_unit: NPU precision: int8 layer_info: @@ -172,14 +172,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 220 - job_id: j5mnow0yp + job_id: jp3jz30mg job_status: Passed torchscript_onnx_qnn: - inference_time: 463.0 - throughput: 2159.827213822894 + inference_time: 460.0 + throughput: 2173.913043478261 estimated_peak_memory_range: min: 0 - max: 11147536 + max: 9999968 primary_compute_unit: NPU precision: int8 layer_info: @@ -187,14 +187,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 160 - job_id: jgo2o1d4p + job_id: jp14nlzkp job_status: Passed torchscript_onnx: - inference_time: 12977.0 - throughput: 77.05941280727441 + inference_time: 12941.0 + throughput: 77.27378100610463 estimated_peak_memory_range: - min: 2031616 - max: 440865584 + min: 2113536 + max: 439833024 primary_compute_unit: NPU precision: int8 layer_info: @@ -202,7 +202,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 56 total_layers: 453 - job_id: jgn6o9qv5 + job_id: jgke2neyg job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -211,13 +211,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T03:43:55Z' + timestamp: '2024-12-12T01:12:56Z' - torchscript_onnx_tflite: - inference_time: 911.0 - throughput: 1097.694840834248 + inference_time: 931.0 + throughput: 1074.1138560687432 estimated_peak_memory_range: min: 12288 - max: 13226016 + max: 13299328 primary_compute_unit: NPU precision: int8 layer_info: @@ -225,14 +225,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 220 - job_id: jgn6o91v5 + job_id: jgo2l061p job_status: Passed torchscript_onnx_qnn: - inference_time: 1119.0 - throughput: 893.6550491510277 + inference_time: 1128.0 + throughput: 886.5248226950355 estimated_peak_memory_range: min: 12288 - max: 8109024 + max: 7477984 primary_compute_unit: NPU precision: int8 layer_info: @@ -240,7 +240,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 160 - job_id: jpv6e1275 + job_id: jgdxd91kp job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -249,13 +249,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T03:43:34Z' + timestamp: '2024-12-12T01:12:36Z' - torchscript_onnx_tflite: - inference_time: 9770.0 - throughput: 102.35414534288638 + inference_time: 10220.0 + throughput: 97.84735812133073 estimated_peak_memory_range: - min: 57344 - max: 5606800 + min: 32768 + max: 5507184 primary_compute_unit: CPU precision: fp32 layer_info: @@ -263,7 +263,7 @@ models: layers_on_gpu: 10 layers_on_cpu: 166 total_layers: 220 - job_id: jprvo4xvg + job_id: jpv6lokz5 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -272,13 +272,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T03:43:14Z' + timestamp: '2024-12-12T01:12:15Z' - torchscript_onnx_tflite: - inference_time: 632.0 - throughput: 1582.2784810126582 + inference_time: 631.0 + throughput: 1584.7860538827258 estimated_peak_memory_range: min: 12288 - max: 8504936 + max: 4239712 primary_compute_unit: NPU precision: int8 layer_info: @@ -286,14 +286,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 220 - job_id: jp2k47oxp + job_id: jgjvrmn1g job_status: Passed torchscript_onnx_qnn: - inference_time: 539.0 - throughput: 1855.287569573284 + inference_time: 536.0 + throughput: 1865.6716417910447 estimated_peak_memory_range: - min: 200704 - max: 1388200 + min: 188416 + max: 1665376 primary_compute_unit: NPU precision: int8 layer_info: @@ -301,7 +301,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 160 - job_id: jgjvo037g + job_id: j57yewrq5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -310,13 +310,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T03:43:36Z' + timestamp: '2024-12-12T01:12:37Z' - torchscript_onnx_tflite: - inference_time: 1605.0 - throughput: 623.0529595015577 + inference_time: 1612.0 + throughput: 620.3473945409429 estimated_peak_memory_range: - min: 12288 - max: 11951824 + min: 16384 + max: 14167008 primary_compute_unit: NPU precision: int8 layer_info: @@ -324,14 +324,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 220 - job_id: jpy1q48rp + job_id: jped71m85 job_status: Passed torchscript_onnx_qnn: - inference_time: 1732.0 - throughput: 577.3672055427252 + inference_time: 1725.0 + throughput: 579.7101449275362 estimated_peak_memory_range: - min: 0 - max: 5850320 + min: 94208 + max: 10631584 primary_compute_unit: NPU precision: int8 layer_info: @@ -339,7 +339,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 160 - job_id: jgz38xzz5 + job_id: jpxkljoj5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -348,13 +348,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T03:43:40Z' + timestamp: '2024-12-12T01:12:41Z' - torchscript_onnx_tflite: - inference_time: 628.0 - throughput: 1592.3566878980891 + inference_time: 632.0 + throughput: 1582.2784810126582 estimated_peak_memory_range: min: 12288 - max: 62698896 + max: 8694592 primary_compute_unit: NPU precision: int8 layer_info: @@ -362,14 +362,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 220 - job_id: jp0zd1o25 + job_id: jgz3l9d45 job_status: Passed torchscript_onnx_qnn: - inference_time: 535.0 - throughput: 1869.1588785046729 + inference_time: 537.0 + throughput: 1862.1973929236499 estimated_peak_memory_range: min: 184320 - max: 1579432 + max: 1503664 primary_compute_unit: NPU precision: int8 layer_info: @@ -377,7 +377,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 160 - job_id: j5we8dyz5 + job_id: j5mn02xyp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -386,13 +386,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T03:43:42Z' + timestamp: '2024-12-12T01:12:43Z' - torchscript_onnx_tflite: - inference_time: 994.0 - throughput: 1006.0362173038229 + inference_time: 997.0 + throughput: 1003.0090270812437 estimated_peak_memory_range: - min: 12288 - max: 10320624 + min: 16384 + max: 10921072 primary_compute_unit: NPU precision: int8 layer_info: @@ -400,14 +400,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 220 - job_id: jp8q63jzp + job_id: j5welv645 job_status: Passed torchscript_onnx_qnn: - inference_time: 996.0 - throughput: 1004.0160642570281 + inference_time: 1672.0 + throughput: 598.0861244019138 estimated_peak_memory_range: - min: 163840 - max: 6129456 + min: 0 + max: 6017136 primary_compute_unit: NPU precision: int8 layer_info: @@ -415,7 +415,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 160 - job_id: jg9lk3oqg + job_id: jgn6zy6v5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -424,13 +424,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T03:43:44Z' + timestamp: '2024-12-12T01:12:45Z' - torchscript_onnx_tflite: - inference_time: 631.0 - throughput: 1584.7860538827258 + inference_time: 627.0 + throughput: 1594.896331738437 estimated_peak_memory_range: - min: 12288 - max: 8541480 + min: 16384 + max: 8561016 primary_compute_unit: NPU precision: int8 layer_info: @@ -438,14 +438,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 220 - job_id: jgkeol6yg + job_id: jg9lz1nmg job_status: Passed torchscript_onnx_qnn: - inference_time: 538.0 - throughput: 1858.736059479554 + inference_time: 536.0 + throughput: 1865.6716417910447 estimated_peak_memory_range: - min: 180224 - max: 1467096 + min: 184320 + max: 1771200 primary_compute_unit: NPU precision: int8 layer_info: @@ -453,7 +453,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 160 - job_id: jgdx8r6kp + job_id: jprvlqvvg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -462,13 +462,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T03:43:46Z' + timestamp: '2024-12-12T01:12:47Z' - torchscript_onnx_tflite: - inference_time: 1037.0 - throughput: 964.3201542912246 + inference_time: 1032.0 + throughput: 968.9922480620155 estimated_peak_memory_range: - min: 16384 - max: 9875200 + min: 20480 + max: 14820208 primary_compute_unit: NPU precision: int8 layer_info: @@ -476,14 +476,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 220 - job_id: j5q6z747p + job_id: jp14nlznp job_status: Passed torchscript_onnx_qnn: - inference_time: 1037.0 - throughput: 964.3201542912246 + inference_time: 1065.0 + throughput: 938.9671361502348 estimated_peak_memory_range: - min: 172032 - max: 6047296 + min: 163840 + max: 6216160 primary_compute_unit: NPU precision: int8 layer_info: @@ -491,7 +491,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 160 - job_id: j57ykjoq5 + job_id: jp2kr6kxp job_status: Passed reference_device_info: name: SA8775P ADP @@ -500,13 +500,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T03:43:48Z' + timestamp: '2024-12-12T01:12:48Z' - torchscript_onnx_tflite: - inference_time: 672.0 - throughput: 1488.095238095238 + inference_time: 675.0 + throughput: 1481.4814814814815 estimated_peak_memory_range: - min: 16384 - max: 15798896 + min: 12288 + max: 21761264 primary_compute_unit: NPU precision: int8 layer_info: @@ -514,14 +514,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 220 - job_id: jglvo0we5 + job_id: jgdxd916p job_status: Passed torchscript_onnx_qnn: - inference_time: 637.0 - throughput: 1569.8587127158555 + inference_time: 630.0 + throughput: 1587.3015873015872 estimated_peak_memory_range: - min: 163840 - max: 17043680 + min: 159744 + max: 17586816 primary_compute_unit: NPU precision: int8 layer_info: @@ -529,7 +529,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 160 - job_id: jp4lmxeq5 + job_id: jpy1ow1rp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -538,13 +538,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T03:43:50Z' + timestamp: '2024-12-12T01:12:51Z' - torchscript_onnx_qnn: - inference_time: 978.0 - throughput: 1022.4948875255624 + inference_time: 668.0 + throughput: 1497.005988023952 estimated_peak_memory_range: - min: 454656 - max: 454656 + min: 516096 + max: 516096 primary_compute_unit: NPU precision: int8 layer_info: @@ -552,14 +552,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 160 - job_id: jped8r675 + job_id: jp4lyorq5 job_status: Passed torchscript_onnx: - inference_time: 18268.0 - throughput: 54.74052988832932 + inference_time: 18138.0 + throughput: 55.132870217223505 estimated_peak_memory_range: - min: 10547200 - max: 10547200 + min: 10473472 + max: 10473472 primary_compute_unit: NPU precision: int8 layer_info: @@ -567,7 +567,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 56 total_layers: 453 - job_id: jprvo4dvg + job_id: j5q6lk67p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -576,4 +576,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T03:43:57Z' + timestamp: '2024-12-12T01:12:57Z' diff --git a/qai_hub_models/models/sinet/perf.yaml b/qai_hub_models/models/sinet/perf.yaml index e7d8bfe7..0793e988 100644 --- a/qai_hub_models/models/sinet/perf.yaml +++ b/qai_hub_models/models/sinet/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: SINet universal_assets: - torchscript_onnx_tflite: mqpz0k5vn - torchscript_onnx: mnjxw8gkq + torchscript_onnx_tflite: mq8dk943m + torchscript_onnx: mm6kv2g4n performance_metrics: - torchscript_onnx_tflite: - inference_time: 1763.0 - throughput: 567.2149744753261 + inference_time: 1766.0 + throughput: 566.2514156285391 estimated_peak_memory_range: - min: 16384 - max: 23069000 + min: 28672 + max: 5735952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: jgkeov6og + job_id: jprvldn7g job_status: Passed torchscript_onnx_qnn: - inference_time: 1188.0 - throughput: 841.7508417508418 + inference_time: 1185.0 + throughput: 843.8818565400844 estimated_peak_memory_range: - min: 630784 - max: 5843840 + min: 2109440 + max: 73947384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jg9lk0ovg + job_id: jgo2lw8kp job_status: Passed torchscript_onnx: - inference_time: 2259.0 - throughput: 442.67374944665784 + inference_time: 2287.0 + throughput: 437.25404459991256 estimated_peak_memory_range: - min: 286720 - max: 2251280 + min: 479232 + max: 2588640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 229 - job_id: jp0zd49e5 + job_id: jpxklrxl5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:31:46Z' + timestamp: '2024-12-11T22:54:29Z' - torchscript_onnx_tflite: inference_time: 1146.0 throughput: 872.6003490401396 estimated_peak_memory_range: - min: 16384 - max: 15490800 + min: 12288 + max: 14494000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: j5q6z04mp + job_id: jp2krdvqp job_status: Passed torchscript_onnx_qnn: inference_time: 798.0 throughput: 1253.1328320802006 estimated_peak_memory_range: - min: 618496 - max: 15896416 + min: 0 + max: 16494016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jp1472olp + job_id: jpv6lm7r5 job_status: Passed torchscript_onnx: - inference_time: 1515.0 - throughput: 660.0660066006601 + inference_time: 1497.0 + throughput: 668.002672010688 estimated_peak_memory_range: min: 0 - max: 37941680 + max: 38105920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 229 - job_id: jp8q62r8p + job_id: j5mn0k89p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:31:47Z' + timestamp: '2024-12-11T22:54:30Z' - torchscript_onnx_tflite: - inference_time: 1174.0 - throughput: 851.7887563884157 + inference_time: 945.0 + throughput: 1058.2010582010582 estimated_peak_memory_range: - min: 12288 - max: 12562624 + min: 8192 + max: 17322432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: jglvo4wl5 + job_id: jpy1o27lp job_status: Passed torchscript_onnx_qnn: - inference_time: 744.0 - throughput: 1344.0860215053763 + inference_time: 623.0 + throughput: 1605.1364365971108 estimated_peak_memory_range: min: 0 - max: 12318176 + max: 16268208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jgdx8n6lp + job_id: jgjvryqeg job_status: Passed torchscript_onnx: - inference_time: 1513.0 - throughput: 660.9385327164574 + inference_time: 1538.0 + throughput: 650.1950585175553 estimated_peak_memory_range: - min: 286720 - max: 26731984 + min: 0 + max: 26127888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 229 - job_id: jgkeov0og + job_id: jgn6zqkq5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:31:48Z' + timestamp: '2024-12-11T22:54:31Z' - torchscript_onnx_tflite: - inference_time: 1754.0 - throughput: 570.1254275940707 + inference_time: 1759.0 + throughput: 568.5048322910744 estimated_peak_memory_range: - min: 24576 - max: 5582384 + min: 151552 + max: 5976608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: j56yr2o7p + job_id: jp0zm9vn5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1158.0 - throughput: 863.5578583765113 + inference_time: 1159.0 + throughput: 862.8127696289905 estimated_peak_memory_range: - min: 634880 - max: 1834944 + min: 643072 + max: 1891472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: j57yk2nr5 + job_id: jped7xyv5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:31:36Z' + timestamp: '2024-12-11T22:54:20Z' - torchscript_onnx_tflite: - inference_time: 9031.0 - throughput: 110.7297087808659 + inference_time: 9037.0 + throughput: 110.65619121389842 estimated_peak_memory_range: - min: 438272 - max: 13094688 + min: 16384 + max: 15300560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: jp3jxnozg + job_id: jp8qer4op job_status: Passed torchscript_onnx_qnn: - inference_time: 8165.0 - throughput: 122.4739742804654 + inference_time: 8089.0 + throughput: 123.62467548522685 estimated_peak_memory_range: - min: 663552 - max: 6143872 + min: 618496 + max: 11313088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jpxk39r95 + job_id: j5welz4m5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:31:39Z' + timestamp: '2024-12-11T22:54:22Z' - torchscript_onnx_tflite: - inference_time: 1771.0 - throughput: 564.652738565782 + inference_time: 1761.0 + throughput: 567.8591709256104 estimated_peak_memory_range: - min: 20480 - max: 5851480 + min: 16384 + max: 25428632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: jgo2ozddp + job_id: jgke209ng job_status: Passed torchscript_onnx_qnn: - inference_time: 1187.0 - throughput: 842.4599831508003 + inference_time: 1226.0 + throughput: 815.6606851549756 estimated_peak_memory_range: - min: 638976 - max: 1879952 + min: 634880 + max: 1857856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: j5mnoekqp + job_id: jg9lz2d8g job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:31:40Z' + timestamp: '2024-12-11T22:54:24Z' - torchscript_onnx_tflite: - inference_time: 2418.0 - throughput: 413.564929693962 + inference_time: 2391.0 + throughput: 418.23504809703053 estimated_peak_memory_range: min: 16384 - max: 11183008 + max: 12071936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: jpv6eq2m5 + job_id: j5q6l1mop job_status: Passed torchscript_onnx_qnn: - inference_time: 2133.0 - throughput: 468.8232536333802 + inference_time: 1989.0 + throughput: 502.76520864756156 estimated_peak_memory_range: min: 618496 - max: 6698848 + max: 6759184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jgn6o0qm5 + job_id: jp14n167p job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:31:41Z' + timestamp: '2024-12-11T22:54:25Z' - torchscript_onnx_tflite: inference_time: 1762.0 throughput: 567.5368898978434 estimated_peak_memory_range: - min: 20480 - max: 5324024 + min: 16384 + max: 75171568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: jgjvod38g + job_id: jglvy8lm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1188.0 - throughput: 841.7508417508418 + inference_time: 1176.0 + throughput: 850.3401360544218 estimated_peak_memory_range: - min: 643072 - max: 2209624 + min: 24576 + max: 1512200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jprvo6deg + job_id: jgdxd42zp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:31:42Z' + timestamp: '2024-12-11T22:54:26Z' - torchscript_onnx_tflite: - inference_time: 2661.0 - throughput: 375.7985719654265 + inference_time: 2659.0 + throughput: 376.081233546446 estimated_peak_memory_range: min: 16384 - max: 11286816 + max: 15943216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: jped8o605 + job_id: j56y8mwyp job_status: Passed torchscript_onnx_qnn: - inference_time: 1982.0 - throughput: 504.54086781029264 + inference_time: 1997.0 + throughput: 500.75112669003505 estimated_peak_memory_range: - min: 618496 - max: 6410192 + min: 622592 + max: 6847280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jp2k4xdmp + job_id: j57yen995 job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:31:44Z' + timestamp: '2024-12-11T22:54:27Z' - torchscript_onnx_tflite: - inference_time: 1889.0 - throughput: 529.3806246691371 + inference_time: 1901.0 + throughput: 526.0389268805892 estimated_peak_memory_range: min: 16384 - max: 16602464 + max: 16874784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: j5we8wyj5 + job_id: jp3jz76ng job_status: Passed torchscript_onnx_qnn: - inference_time: 1326.0 - throughput: 754.1478129713424 + inference_time: 1335.0 + throughput: 749.0636704119851 estimated_peak_memory_range: min: 618496 - max: 18527792 + max: 16618336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jpy1qz24p + job_id: jp4ly4315 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:31:45Z' + timestamp: '2024-12-11T22:54:28Z' - torchscript_onnx_qnn: - inference_time: 1359.0 - throughput: 735.8351729212657 + inference_time: 1364.0 + throughput: 733.1378299120234 estimated_peak_memory_range: min: 602112 max: 602112 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jp4lmn4l5 + job_id: jgz3lynx5 job_status: Passed torchscript_onnx: - inference_time: 2381.0 - throughput: 419.99160016799664 + inference_time: 2406.0 + throughput: 415.6275976724855 estimated_peak_memory_range: - min: 1765376 - max: 1765376 + min: 1810432 + max: 1810432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 229 - job_id: j5q6z01mp + job_id: jprvldw7g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:31:49Z' + timestamp: '2024-12-11T22:54:32Z' diff --git a/qai_hub_models/models/squeezenet1_1/perf.yaml b/qai_hub_models/models/squeezenet1_1/perf.yaml index 4ce263f3..76a84f06 100644 --- a/qai_hub_models/models/squeezenet1_1/perf.yaml +++ b/qai_hub_models/models/squeezenet1_1/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: SqueezeNet-1_1 universal_assets: - torchscript_onnx_tflite: mmdyr04vm - torchscript_onnx: mmr67gw6m + torchscript_onnx_tflite: mn4l1y00q + torchscript_onnx: mmr3605wm performance_metrics: - torchscript_onnx_tflite: - inference_time: 645.0 - throughput: 1550.3875968992247 + inference_time: 647.0 + throughput: 1545.595054095827 estimated_peak_memory_range: - min: 16384 - max: 16867672 + min: 36864 + max: 6417360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jgn6o0zm5 + job_id: j5mn0kodp job_status: Passed torchscript_onnx_qnn: - inference_time: 713.0 - throughput: 1402.5245441795232 + inference_time: 714.0 + throughput: 1400.5602240896358 estimated_peak_memory_range: min: 622592 - max: 43997200 + max: 7662160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jp3jxnzzg + job_id: j56y8mr6p job_status: Passed torchscript_onnx: - inference_time: 640.0 - throughput: 1562.5 + inference_time: 654.0 + throughput: 1529.051987767584 estimated_peak_memory_range: - min: 569344 - max: 1958544 + min: 12288 + max: 3943192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jp4lmnel5 + job_id: j5welz1m5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:31:01Z' + timestamp: '2024-12-11T22:53:45Z' - torchscript_onnx_tflite: - inference_time: 441.0 - throughput: 2267.573696145125 + inference_time: 442.0 + throughput: 2262.443438914027 estimated_peak_memory_range: - min: 16384 - max: 14073776 + min: 12288 + max: 12956656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jprvo6leg + job_id: jgn6zqok5 job_status: Passed torchscript_onnx_qnn: inference_time: 490.0 throughput: 2040.8163265306123 estimated_peak_memory_range: min: 0 - max: 14352592 + max: 10905264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jgo2ozldp + job_id: jp3jz7x3g job_status: Passed torchscript_onnx: - inference_time: 478.0 - throughput: 2092.050209205021 + inference_time: 483.0 + throughput: 2070.3933747412007 estimated_peak_memory_range: min: 0 - max: 28024384 + max: 28291968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jpxk39095 + job_id: jg9lz2x8g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:31:02Z' + timestamp: '2024-12-11T22:53:46Z' - torchscript_onnx_tflite: - inference_time: 436.0 - throughput: 2293.577981651376 + inference_time: 356.0 + throughput: 2808.9887640449438 estimated_peak_memory_range: - min: 12288 - max: 9045552 + min: 48214016 + max: 57232112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jp2k4xrmp + job_id: jprvldo0g job_status: Passed torchscript_onnx_qnn: - inference_time: 414.0 - throughput: 2415.458937198068 + inference_time: 398.0 + throughput: 2512.5628140703516 estimated_peak_memory_range: min: 0 - max: 8468608 + max: 9267152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jpv6eqlm5 + job_id: jgo2lwoqp job_status: Passed torchscript_onnx: - inference_time: 420.0 - throughput: 2380.9523809523807 + inference_time: 416.0 + throughput: 2403.846153846154 estimated_peak_memory_range: min: 0 - max: 17158160 + max: 16569440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: j5mnoe9qp + job_id: jp14n1v7p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:31:03Z' + timestamp: '2024-12-11T22:53:47Z' - torchscript_onnx_tflite: inference_time: 635.0 throughput: 1574.8031496062993 estimated_peak_memory_range: - min: 16384 - max: 23921640 + min: 12288 + max: 6162264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jpy1qzo4p + job_id: jp2krd4rp job_status: Passed torchscript_onnx_qnn: - inference_time: 645.0 - throughput: 1550.3875968992247 + inference_time: 643.0 + throughput: 1555.2099533437015 estimated_peak_memory_range: - min: 655360 - max: 1909496 + min: 630784 + max: 1820328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jgjvodr8g + job_id: jpv6lm9k5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:30:53Z' + timestamp: '2024-12-11T22:53:36Z' - torchscript_onnx_tflite: - inference_time: 9538.0 - throughput: 104.84378276368211 + inference_time: 9530.0 + throughput: 104.9317943336831 estimated_peak_memory_range: - min: 49152 - max: 10719712 + min: 20480 + max: 13630800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jp0zd4me5 + job_id: jpy1o2q8p job_status: Passed torchscript_onnx_qnn: - inference_time: 9738.0 - throughput: 102.69049086054632 + inference_time: 9715.0 + throughput: 102.9336078229542 estimated_peak_memory_range: - min: 552960 - max: 6414352 + min: 569344 + max: 11263696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jgz382l65 + job_id: jped7xlo5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:30:55Z' + timestamp: '2024-12-11T22:53:38Z' - torchscript_onnx_tflite: - inference_time: 645.0 - throughput: 1550.3875968992247 + inference_time: 642.0 + throughput: 1557.632398753894 estimated_peak_memory_range: - min: 16384 - max: 96130048 + min: 20480 + max: 6393928 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jp8q62e8p + job_id: jp0zm9d95 job_status: Passed torchscript_onnx_qnn: - inference_time: 653.0 - throughput: 1531.3935681470139 + inference_time: 645.0 + throughput: 1550.3875968992247 estimated_peak_memory_range: min: 638976 - max: 2230752 + max: 2274232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: j5we8wlj5 + job_id: jgz3ly4o5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:30:56Z' + timestamp: '2024-12-11T22:53:39Z' - torchscript_onnx_tflite: - inference_time: 1199.0 - throughput: 834.0283569641368 + inference_time: 1193.0 + throughput: 838.2229673093043 estimated_peak_memory_range: - min: 12288 - max: 8155632 + min: 16384 + max: 8283536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jgkeov2og + job_id: jp8qer6kp job_status: Passed torchscript_onnx_qnn: - inference_time: 1267.0 - throughput: 789.2659826361484 + inference_time: 1450.0 + throughput: 689.6551724137931 estimated_peak_memory_range: min: 0 - max: 6042880 + max: 6137616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jg9lk0zvg + job_id: j5welz135 job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:30:57Z' + timestamp: '2024-12-11T22:53:40Z' - torchscript_onnx_tflite: - inference_time: 643.0 - throughput: 1555.2099533437015 + inference_time: 639.0 + throughput: 1564.9452269170579 estimated_peak_memory_range: - min: 28672 - max: 8428328 + min: 16384 + max: 90671152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: j5q6z0lmp + job_id: jgke20owg job_status: Passed torchscript_onnx_qnn: - inference_time: 648.0 - throughput: 1543.20987654321 + inference_time: 646.0 + throughput: 1547.9876160990711 estimated_peak_memory_range: - min: 24576 - max: 1182680 + min: 630784 + max: 1845768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jp1472nlp + job_id: jg9lz2xwg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:30:58Z' + timestamp: '2024-12-11T22:53:42Z' - torchscript_onnx_tflite: inference_time: 1176.0 throughput: 850.3401360544218 estimated_peak_memory_range: - min: 16384 - max: 8082480 + min: 20480 + max: 12747056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jglvo4yl5 + job_id: j5q6l1znp job_status: Passed torchscript_onnx_qnn: - inference_time: 1358.0 - throughput: 736.3770250368188 + inference_time: 1374.0 + throughput: 727.802037845706 estimated_peak_memory_range: - min: 0 - max: 5726320 + min: 622592 + max: 6886832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jgdx8ndlp + job_id: jp14n1v8p job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:30:59Z' + timestamp: '2024-12-11T22:53:43Z' - torchscript_onnx_tflite: - inference_time: 821.0 - throughput: 1218.026796589525 + inference_time: 803.0 + throughput: 1245.3300124533 estimated_peak_memory_range: min: 16384 - max: 12942192 + max: 15202112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: j56yr287p + job_id: jglvy8oj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 877.0 - throughput: 1140.2508551881415 + inference_time: 873.0 + throughput: 1145.475372279496 estimated_peak_memory_range: min: 618496 - max: 16826064 + max: 15982416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: j57yk2or5 + job_id: jgdxd4zrp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:31:00Z' + timestamp: '2024-12-11T22:53:44Z' - torchscript_onnx_qnn: - inference_time: 769.0 - throughput: 1300.3901170351105 + inference_time: 763.0 + throughput: 1310.615989515072 estimated_peak_memory_range: min: 602112 max: 602112 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jped8o705 + job_id: jgjvrywvg job_status: Passed torchscript_onnx: - inference_time: 669.0 - throughput: 1494.7683109118086 + inference_time: 699.0 + throughput: 1430.615164520744 estimated_peak_memory_range: - min: 4239360 - max: 4239360 + min: 4304896 + max: 4304896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jgn6o01m5 + job_id: jgdxd4zzp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:31:04Z' + timestamp: '2024-12-11T22:53:48Z' diff --git a/qai_hub_models/models/squeezenet1_1_quantized/info.yaml b/qai_hub_models/models/squeezenet1_1_quantized/info.yaml index ed6aefdd..3336f11a 100644 --- a/qai_hub_models/models/squeezenet1_1_quantized/info.yaml +++ b/qai_hub_models/models/squeezenet1_1_quantized/info.yaml @@ -11,6 +11,7 @@ use_case: Image Classification tags: - backbone - quantized +imsdk_supported: true research_paper: https://arxiv.org/abs/1602.07360 research_paper_title: 'SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size' diff --git a/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml b/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml index 9d70735e..c48ad7c2 100644 --- a/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml +++ b/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml @@ -50,15 +50,15 @@ aggregated: models: - name: SqueezeNet-1_1Quantized universal_assets: - torchscript_onnx_tflite: mqexp8eyn - torchscript_onnx: mnjxkve9q + torchscript_onnx_tflite: mn4l1evwq + torchscript_onnx: mmd3y690n performance_metrics: - torchscript_onnx_tflite: - inference_time: 210.0 - throughput: 4761.9047619047615 + inference_time: 203.0 + throughput: 4926.108374384236 estimated_peak_memory_range: - min: 24576 - max: 9951488 + min: 12288 + max: 28810136 primary_compute_unit: NPU precision: int8 layer_info: @@ -66,14 +66,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 43 - job_id: jpv6e1dz5 + job_id: jgdxdw96p job_status: Passed torchscript_onnx_qnn: - inference_time: 466.0 - throughput: 2145.922746781116 + inference_time: 460.0 + throughput: 2173.913043478261 estimated_peak_memory_range: - min: 28672 - max: 2982200 + min: 172032 + max: 10164304 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,14 +81,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jgn6o9nj5 + job_id: j56y86lnp job_status: Passed torchscript_onnx: - inference_time: 36352.0 - throughput: 27.508802816901408 + inference_time: 36264.0 + throughput: 27.575557026251932 estimated_peak_memory_range: - min: 10436608 - max: 14250072 + min: 10424320 + max: 14029224 primary_compute_unit: NPU precision: int8 layer_info: @@ -96,7 +96,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 26 total_layers: 177 - job_id: jgjvo0r1g + job_id: jgn6z28j5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -105,13 +105,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T03:42:40Z' + timestamp: '2024-12-12T01:11:39Z' - torchscript_onnx_tflite: - inference_time: 151.0 - throughput: 6622.516556291391 + inference_time: 156.0 + throughput: 6410.25641025641 estimated_peak_memory_range: min: 12288 - max: 14219760 + max: 10514848 primary_compute_unit: NPU precision: int8 layer_info: @@ -119,14 +119,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 43 - job_id: jgjvo071g + job_id: jp4lyqo25 job_status: Passed torchscript_onnx_qnn: - inference_time: 346.0 - throughput: 2890.173410404624 + inference_time: 342.0 + throughput: 2923.9766081871344 estimated_peak_memory_range: - min: 159744 - max: 10941296 + min: 0 + max: 12069280 primary_compute_unit: NPU precision: int8 layer_info: @@ -134,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jprvo40kg + job_id: jp3jzk2mg job_status: Passed torchscript_onnx: - inference_time: 29424.0 - throughput: 33.98586188145731 + inference_time: 28600.0 + throughput: 34.96503496503497 estimated_peak_memory_range: - min: 10489856 - max: 1041247728 + min: 10911744 + max: 1043825344 primary_compute_unit: NPU precision: int8 layer_info: @@ -149,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 26 total_layers: 177 - job_id: jped8r785 + job_id: jprvlkjkg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -158,13 +158,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T03:42:41Z' + timestamp: '2024-12-12T01:11:41Z' - torchscript_onnx_tflite: - inference_time: 153.0 - throughput: 6535.9477124183 + inference_time: 154.0 + throughput: 6493.506493506494 estimated_peak_memory_range: min: 8192 - max: 9236688 + max: 9472304 primary_compute_unit: NPU precision: int8 layer_info: @@ -172,14 +172,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 43 - job_id: jped8rz85 + job_id: j5mn0r27p job_status: Passed torchscript_onnx_qnn: - inference_time: 348.0 - throughput: 2873.5632183908046 + inference_time: 363.0 + throughput: 2754.8209366391184 estimated_peak_memory_range: - min: 28672 - max: 8936544 + min: 159744 + max: 9041616 primary_compute_unit: NPU precision: int8 layer_info: @@ -187,14 +187,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jp2k47w6p + job_id: jgo2lyq1p job_status: Passed torchscript_onnx: - inference_time: 27570.0 - throughput: 36.27130939426913 + inference_time: 33391.0 + throughput: 29.94818963193675 estimated_peak_memory_range: - min: 11743232 - max: 979621600 + min: 12263424 + max: 980943120 primary_compute_unit: NPU precision: int8 layer_info: @@ -202,7 +202,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 26 total_layers: 177 - job_id: jgz38xl45 + job_id: jp2kr8n6p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -211,13 +211,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T03:42:43Z' + timestamp: '2024-12-12T01:11:43Z' - torchscript_onnx_tflite: - inference_time: 478.0 - throughput: 2092.050209205021 + inference_time: 500.0 + throughput: 2000.0 estimated_peak_memory_range: min: 12288 - max: 12734384 + max: 17083472 primary_compute_unit: NPU precision: int8 layer_info: @@ -225,14 +225,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 43 - job_id: jgz38xm45 + job_id: jgn6z2yj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 994.0 - throughput: 1006.0362173038229 + inference_time: 998.0 + throughput: 1002.0040080160321 estimated_peak_memory_range: - min: 16384 - max: 7830432 + min: 12288 + max: 6938688 primary_compute_unit: NPU precision: int8 layer_info: @@ -240,7 +240,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jpy1q4x0p + job_id: jpv6l3xz5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -249,13 +249,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T03:42:23Z' + timestamp: '2024-12-12T01:11:23Z' - torchscript_onnx_tflite: - inference_time: 4133.0 - throughput: 241.95499637067505 + inference_time: 4064.0 + throughput: 246.06299212598427 estimated_peak_memory_range: - min: 20480 - max: 6973168 + min: 0 + max: 4656200 primary_compute_unit: NPU precision: int8 layer_info: @@ -263,7 +263,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 43 - job_id: j5we8d745 + job_id: jprvlkqkg job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -272,13 +272,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T03:42:03Z' + timestamp: '2024-12-12T01:11:02Z' - torchscript_onnx_tflite: - inference_time: 203.0 - throughput: 4926.108374384236 + inference_time: 198.0 + throughput: 5050.50505050505 estimated_peak_memory_range: min: 16384 - max: 2923616 + max: 71105040 primary_compute_unit: NPU precision: int8 layer_info: @@ -286,14 +286,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 43 - job_id: jg9lk3mmg + job_id: jp2kr866p job_status: Passed torchscript_onnx_qnn: - inference_time: 429.0 - throughput: 2331.002331002331 + inference_time: 432.0 + throughput: 2314.814814814815 estimated_peak_memory_range: - min: 184320 - max: 1403104 + min: 180224 + max: 1418032 primary_compute_unit: NPU precision: int8 layer_info: @@ -301,7 +301,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jp0zd1m05 + job_id: jgjvrx41g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -310,13 +310,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T03:42:25Z' + timestamp: '2024-12-12T01:11:25Z' - torchscript_onnx_tflite: - inference_time: 1378.0 - throughput: 725.6894049346879 + inference_time: 1376.0 + throughput: 726.7441860465116 estimated_peak_memory_range: - min: 16384 - max: 9840496 + min: 0 + max: 13080768 primary_compute_unit: NPU precision: int8 layer_info: @@ -324,14 +324,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 43 - job_id: jp147djnp + job_id: jpy1oew0p job_status: Passed torchscript_onnx_qnn: - inference_time: 1956.0 - throughput: 511.2474437627812 + inference_time: 2073.0 + throughput: 482.392667631452 estimated_peak_memory_range: - min: 167936 - max: 5890128 + min: 163840 + max: 10833248 primary_compute_unit: NPU precision: int8 layer_info: @@ -339,7 +339,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jgkeol2vg + job_id: j5welon45 job_status: Passed reference_device_info: name: SA7255P ADP @@ -348,13 +348,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T03:42:28Z' + timestamp: '2024-12-12T01:11:28Z' - torchscript_onnx_tflite: - inference_time: 202.0 - throughput: 4950.495049504951 + inference_time: 208.0 + throughput: 4807.692307692308 estimated_peak_memory_range: - min: 0 - max: 29091104 + min: 12288 + max: 3756856 primary_compute_unit: NPU precision: int8 layer_info: @@ -362,14 +362,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 43 - job_id: jgdx8r36p + job_id: jp0zmy705 job_status: Passed torchscript_onnx_qnn: - inference_time: 432.0 - throughput: 2314.814814814815 + inference_time: 429.0 + throughput: 2331.002331002331 estimated_peak_memory_range: - min: 184320 - max: 1721104 + min: 208896 + max: 1524888 primary_compute_unit: NPU precision: int8 layer_info: @@ -377,7 +377,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: j5q6z7lep + job_id: jp14n0xnp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -386,13 +386,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T03:42:30Z' + timestamp: '2024-12-12T01:11:30Z' - torchscript_onnx_tflite: - inference_time: 523.0 - throughput: 1912.0458891013384 + inference_time: 505.0 + throughput: 1980.1980198019803 estimated_peak_memory_range: - min: 12288 - max: 9357248 + min: 16384 + max: 8548720 primary_compute_unit: NPU precision: int8 layer_info: @@ -400,14 +400,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 43 - job_id: j57ykj4n5 + job_id: jp8qeovqp job_status: Passed torchscript_onnx_qnn: - inference_time: 1084.0 - throughput: 922.509225092251 + inference_time: 1017.0 + throughput: 983.284169124877 estimated_peak_memory_range: min: 0 - max: 6002192 + max: 6002112 primary_compute_unit: NPU precision: int8 layer_info: @@ -415,7 +415,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: j56yr38np + job_id: j57yez3n5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -424,13 +424,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T03:42:32Z' + timestamp: '2024-12-12T01:11:32Z' - torchscript_onnx_tflite: - inference_time: 204.0 - throughput: 4901.9607843137255 + inference_time: 200.0 + throughput: 5000.0 estimated_peak_memory_range: - min: 16384 - max: 80352352 + min: 0 + max: 9811616 primary_compute_unit: NPU precision: int8 layer_info: @@ -438,14 +438,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 43 - job_id: jp4lmx125 + job_id: jgke2zmvg job_status: Passed torchscript_onnx_qnn: - inference_time: 431.0 - throughput: 2320.185614849188 + inference_time: 429.0 + throughput: 2331.002331002331 estimated_peak_memory_range: - min: 184320 - max: 1390408 + min: 188416 + max: 1723912 primary_compute_unit: NPU precision: int8 layer_info: @@ -453,7 +453,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jp3jx4zmg + job_id: jp4lyq025 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -462,13 +462,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T03:42:34Z' + timestamp: '2024-12-12T01:11:34Z' - torchscript_onnx_tflite: - inference_time: 459.0 - throughput: 2178.649237472767 + inference_time: 469.0 + throughput: 2132.1961620469083 estimated_peak_memory_range: min: 16384 - max: 8375488 + max: 8688720 primary_compute_unit: NPU precision: int8 layer_info: @@ -476,14 +476,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 43 - job_id: jpxk37485 + job_id: j5q6l8oep job_status: Passed torchscript_onnx_qnn: - inference_time: 913.0 - throughput: 1095.290251916758 + inference_time: 937.0 + throughput: 1067.2358591248667 estimated_peak_memory_range: - min: 167936 - max: 5890016 + min: 163840 + max: 6228320 primary_compute_unit: NPU precision: int8 layer_info: @@ -491,7 +491,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jgo2o1l1p + job_id: jpxklv285 job_status: Passed reference_device_info: name: SA8775P ADP @@ -500,13 +500,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T03:42:36Z' + timestamp: '2024-12-12T01:11:36Z' - torchscript_onnx_tflite: - inference_time: 244.0 - throughput: 4098.360655737705 + inference_time: 239.0 + throughput: 4184.100418410042 estimated_peak_memory_range: - min: 20480 - max: 14272384 + min: 16384 + max: 13654096 primary_compute_unit: NPU precision: int8 layer_info: @@ -514,14 +514,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 43 - job_id: j5mnowm7p + job_id: jglvynr25 job_status: Passed torchscript_onnx_qnn: - inference_time: 520.0 - throughput: 1923.076923076923 + inference_time: 513.0 + throughput: 1949.317738791423 estimated_peak_memory_range: min: 163840 - max: 15085200 + max: 15249008 primary_compute_unit: NPU precision: int8 layer_info: @@ -529,7 +529,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jpv6e1lz5 + job_id: j5mn0ry7p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -538,13 +538,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T03:42:38Z' + timestamp: '2024-12-12T01:11:37Z' - torchscript_onnx_qnn: - inference_time: 565.0 - throughput: 1769.9115044247787 + inference_time: 546.0 + throughput: 1831.5018315018315 estimated_peak_memory_range: - min: 532480 - max: 532480 + min: 466944 + max: 466944 primary_compute_unit: NPU precision: int8 layer_info: @@ -552,14 +552,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jp8q63eqp + job_id: jped79385 job_status: Passed torchscript_onnx: - inference_time: 44190.0 - throughput: 22.629554197782305 + inference_time: 44174.0 + throughput: 22.637750713089147 estimated_peak_memory_range: - min: 17678336 - max: 17678336 + min: 19640320 + max: 19640320 primary_compute_unit: NPU precision: int8 layer_info: @@ -567,7 +567,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 26 total_layers: 177 - job_id: j5we8dl45 + job_id: jpy1oe00p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -576,4 +576,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T03:42:45Z' + timestamp: '2024-12-12T01:11:45Z' diff --git a/qai_hub_models/models/swin_base/perf.yaml b/qai_hub_models/models/swin_base/perf.yaml index 9570a80b..d6795a6c 100644 --- a/qai_hub_models/models/swin_base/perf.yaml +++ b/qai_hub_models/models/swin_base/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: Swin-Base universal_assets: - torchscript_onnx_tflite: mmx71jx2q - torchscript_onnx: mnlvkzxem + torchscript_onnx_tflite: mq8dk953m + torchscript_onnx: mngg1ll5n performance_metrics: - torchscript_onnx_tflite: - inference_time: 26392.0 - throughput: 37.89026977872082 + inference_time: 25933.0 + throughput: 38.560906952531525 estimated_peak_memory_range: - min: 24576 - max: 42463424 + min: 0 + max: 46889696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1568 - job_id: j56yr2wvp + job_id: jg9lz28wg job_status: Passed torchscript_onnx_qnn: - inference_time: 26202.0 - throughput: 38.16502557056713 + inference_time: 26203.0 + throughput: 38.16356905697821 estimated_peak_memory_range: - min: 32768 - max: 48649824 + min: 0 + max: 42559712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1264 - job_id: jgdx8n2kp + job_id: jpy1o2n8p job_status: Passed torchscript_onnx: - inference_time: 43136.0 - throughput: 23.182492581602375 + inference_time: 43110.0 + throughput: 23.19647413593134 estimated_peak_memory_range: - min: 110592 - max: 203826896 + min: 94208 + max: 203977232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1150 - job_id: j5q6z0y7p + job_id: jped7x8o5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:27:19Z' + timestamp: '2024-12-11T22:49:34Z' - torchscript_onnx_tflite: - inference_time: 18329.0 - throughput: 54.5583501554913 + inference_time: 18373.0 + throughput: 54.42769281010178 estimated_peak_memory_range: - min: 45056 - max: 211007904 + min: 40960 + max: 212623504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1568 - job_id: jp3jxn6xg + job_id: jp14n138p job_status: Passed torchscript_onnx_qnn: - inference_time: 17993.0 - throughput: 55.577168899016286 + inference_time: 18277.0 + throughput: 54.713574437818025 estimated_peak_memory_range: - min: 0 - max: 210300272 + min: 78516224 + max: 289515872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1264 - job_id: j57yk29q5 + job_id: jp0zm9k95 job_status: Passed torchscript_onnx: - inference_time: 29943.0 - throughput: 33.396787229068565 + inference_time: 29854.0 + throughput: 33.49634889797012 estimated_peak_memory_range: - min: 733184 - max: 840704992 + min: 0 + max: 837404288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1150 - job_id: j56yr27vp + job_id: jgz3ly8o5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:27:20Z' + timestamp: '2024-12-11T22:49:35Z' - torchscript_onnx_tflite: - inference_time: 16682.0 - throughput: 59.944850737321666 + inference_time: 16878.0 + throughput: 59.248726152387725 estimated_peak_memory_range: min: 16384 - max: 212032656 + max: 212118448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1568 - job_id: jgo2oz84p + job_id: jgdxd40rp job_status: Passed torchscript_onnx_qnn: - inference_time: 16586.0 - throughput: 60.2918123718799 + inference_time: 16556.0 + throughput: 60.40106305870983 estimated_peak_memory_range: min: 0 - max: 218596336 + max: 214709136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1264 - job_id: jp4lmn3q5 + job_id: jp8qer8kp job_status: Passed torchscript_onnx: - inference_time: 27271.0 - throughput: 36.66898903597228 + inference_time: 23639.0 + throughput: 42.3029738990651 estimated_peak_memory_range: - min: 0 - max: 314734688 + min: 634880 + max: 314835152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1150 - job_id: jgo2ozr4p + job_id: j5welz835 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:27:21Z' + timestamp: '2024-12-11T22:49:36Z' - torchscript_onnx_tflite: - inference_time: 26291.0 - throughput: 38.03582975162603 + inference_time: 26106.0 + throughput: 38.30537041293189 estimated_peak_memory_range: - min: 36864 - max: 45185736 + min: 20480 + max: 46565312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1568 - job_id: jpv6eq775 + job_id: j57yen6v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 23602.0 - throughput: 42.36929073807305 + inference_time: 23620.0 + throughput: 42.337002540220155 estimated_peak_memory_range: - min: 737280 - max: 1897872 + min: 724992 + max: 1882968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1264 - job_id: jpxk39xj5 + job_id: jgke20dwg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:27:09Z' + timestamp: '2024-12-11T22:49:25Z' - torchscript_onnx_tflite: - inference_time: 307272.0 - throughput: 3.254445572652243 + inference_time: 307133.0 + throughput: 3.2559184457547707 estimated_peak_memory_range: - min: 20480 - max: 214794928 + min: 65536 + max: 214898640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1568 - job_id: jgjvodq7g + job_id: jp4ly4885 job_status: Passed torchscript_onnx_qnn: - inference_time: 303060.0 - throughput: 3.2996766316900943 + inference_time: 303132.0 + throughput: 3.2988928915455973 estimated_peak_memory_range: - min: 581632 - max: 6671312 + min: 720896 + max: 11093584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1264 - job_id: jgn6o0nv5 + job_id: jglvy87j5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:27:12Z' + timestamp: '2024-12-11T22:49:28Z' - torchscript_onnx_tflite: - inference_time: 26172.0 - throughput: 38.20877273421978 + inference_time: 26121.0 + throughput: 38.283373530875544 estimated_peak_memory_range: - min: 36864 - max: 46866128 + min: 49152 + max: 42424368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1568 - job_id: jped8oy75 + job_id: jpxklrm35 job_status: Passed torchscript_onnx_qnn: - inference_time: 23641.0 - throughput: 42.299395118649805 + inference_time: 23826.0 + throughput: 41.97095609837992 estimated_peak_memory_range: - min: 675840 - max: 1880176 + min: 671744 + max: 2121800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1264 - job_id: jprvo60vg + job_id: j56y8mv6p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:27:13Z' + timestamp: '2024-12-11T22:49:29Z' - torchscript_onnx_tflite: - inference_time: 36807.0 - throughput: 27.168745075664955 + inference_time: 36811.0 + throughput: 27.16579283366385 estimated_peak_memory_range: - min: 0 - max: 200023456 + min: 61440 + max: 200478336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1568 - job_id: jgz382nz5 + job_id: j5mn0k4dp job_status: Passed torchscript_onnx_qnn: - inference_time: 34696.0 - throughput: 28.821766197832602 + inference_time: 34666.0 + throughput: 28.846708590549817 estimated_peak_memory_range: - min: 720896 - max: 6521408 + min: 712704 + max: 7092528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1264 - job_id: jp2k4xwxp + job_id: jp3jz783g job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:27:14Z' + timestamp: '2024-12-11T22:49:30Z' - torchscript_onnx_tflite: - inference_time: 26369.0 - throughput: 37.92331904888316 + inference_time: 26312.0 + throughput: 38.005472788081484 estimated_peak_memory_range: - min: 40960 - max: 47623600 + min: 53248 + max: 41983648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1568 - job_id: j5we8w4z5 + job_id: jgn6zqxk5 job_status: Passed torchscript_onnx_qnn: - inference_time: 23763.0 - throughput: 42.08222867483062 + inference_time: 23837.0 + throughput: 41.951587867600786 estimated_peak_memory_range: - min: 692224 - max: 2014256 + min: 753664 + max: 1904856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1264 - job_id: jpy1qzxrp + job_id: jgo2lwmqp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:27:15Z' + timestamp: '2024-12-11T22:49:31Z' - torchscript_onnx_tflite: - inference_time: 35687.0 - throughput: 28.021408355983972 + inference_time: 35701.0 + throughput: 28.010419876193943 estimated_peak_memory_range: - min: 32768 - max: 215565840 + min: 16384 + max: 215769696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1568 - job_id: jg9lk0dqg + job_id: jprvld90g job_status: Passed torchscript_onnx_qnn: - inference_time: 33079.0 - throughput: 30.230659935306388 + inference_time: 33118.0 + throughput: 30.195060088169576 estimated_peak_memory_range: - min: 0 - max: 5736672 + min: 663552 + max: 11710752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1264 - job_id: jp0zd4j25 + job_id: jpv6lmek5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:27:17Z' + timestamp: '2024-12-11T22:49:32Z' - torchscript_onnx_tflite: - inference_time: 33515.0 - throughput: 29.83738624496494 + inference_time: 33229.0 + throughput: 30.09419482981733 estimated_peak_memory_range: - min: 53248 - max: 200941840 + min: 40960 + max: 201506720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1568 - job_id: jp14726kp + job_id: jp2krdjrp job_status: Passed torchscript_onnx_qnn: - inference_time: 33690.0 - throughput: 29.682398337785692 + inference_time: 33311.0 + throughput: 30.02011347602894 estimated_peak_memory_range: - min: 663552 - max: 198620400 + min: 675840 + max: 203521648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1264 - job_id: jp8q62xzp + job_id: jgjvryovg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:27:18Z' + timestamp: '2024-12-11T22:49:33Z' - torchscript_onnx_qnn: - inference_time: 24377.0 - throughput: 41.02227509537679 + inference_time: 24425.0 + throughput: 40.941658137154555 estimated_peak_memory_range: min: 602112 max: 602112 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1264 - job_id: j5mnoe8yp + job_id: j5q6l1wnp job_status: Passed torchscript_onnx: - inference_time: 50182.0 - throughput: 19.927464030927425 + inference_time: 50236.0 + throughput: 19.90604347479895 estimated_peak_memory_range: - min: 183070720 - max: 183070720 + min: 184172544 + max: 184172544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1150 - job_id: jpv6eqd75 + job_id: jg9lz2kwg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:27:22Z' + timestamp: '2024-12-11T22:49:37Z' diff --git a/qai_hub_models/models/swin_small/perf.yaml b/qai_hub_models/models/swin_small/perf.yaml index 954a854b..8a8d861f 100644 --- a/qai_hub_models/models/swin_small/perf.yaml +++ b/qai_hub_models/models/swin_small/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: Swin-Small universal_assets: - torchscript_onnx_tflite: mqex9y84n - torchscript_onnx: mng1p2g0n + torchscript_onnx_tflite: mqyv3pp5q + torchscript_onnx: mngg1l95n performance_metrics: - torchscript_onnx_tflite: - inference_time: 19274.0 - throughput: 51.88336619279859 + inference_time: 19442.0 + throughput: 51.43503754757741 estimated_peak_memory_range: - min: 20480 - max: 40453352 + min: 49152 + max: 42199376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1563 - job_id: jpy1qz7rp + job_id: j56y8m36p job_status: Passed torchscript_onnx_qnn: - inference_time: 19387.0 - throughput: 51.580956310930006 + inference_time: 19271.0 + throughput: 51.89144310103264 estimated_peak_memory_range: - min: 40960 - max: 40820136 + min: 36864 + max: 44709296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: jgjvodw7g + job_id: j57yenvv5 job_status: Passed torchscript_onnx: - inference_time: 31280.0 - throughput: 31.9693094629156 + inference_time: 31243.0 + throughput: 32.00716960599174 estimated_peak_memory_range: - min: 110592 - max: 119625384 + min: 65536 + max: 119011024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1145 - job_id: jgn6o0kv5 + job_id: j5q6l19np job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:26:22Z' + timestamp: '2024-12-11T22:48:40Z' - torchscript_onnx_tflite: - inference_time: 13075.0 - throughput: 76.48183556405354 + inference_time: 12936.0 + throughput: 77.30364873222015 estimated_peak_memory_range: - min: 16384 - max: 167985088 + min: 20480 + max: 167258512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1563 - job_id: jp0zd4v25 + job_id: jp3jz743g job_status: Passed torchscript_onnx_qnn: - inference_time: 12854.0 - throughput: 77.79679477205539 + inference_time: 12978.0 + throughput: 77.05347511172754 estimated_peak_memory_range: - min: 0 - max: 167692384 + min: 647168 + max: 170585792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: jped8ol75 + job_id: jp4ly4j85 job_status: Passed torchscript_onnx: - inference_time: 21000.0 - throughput: 47.61904761904762 + inference_time: 21066.0 + throughput: 47.46985664103294 estimated_peak_memory_range: - min: 667648 - max: 785990720 + min: 90112 + max: 786402944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1145 - job_id: jprvo6wvg + job_id: jglvy8ej5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:26:23Z' + timestamp: '2024-12-11T22:48:41Z' - torchscript_onnx_tflite: - inference_time: 10193.0 - throughput: 98.10654370646522 + inference_time: 12015.0 + throughput: 83.22929671244277 estimated_peak_memory_range: min: 16384 - max: 169462064 + max: 169549248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1563 - job_id: jp8q624zp + job_id: jgo2lw1qp job_status: Passed torchscript_onnx_qnn: - inference_time: 11813.0 - throughput: 84.65250148141878 + inference_time: 11825.0 + throughput: 84.56659619450318 estimated_peak_memory_range: - min: 0 - max: 176132080 + min: 106496 + max: 175115696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,7 +181,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: jgz3824z5 + job_id: jpxklre35 + job_status: Passed + torchscript_onnx: + inference_time: 16145.0 + throughput: 61.93868070610096 + estimated_peak_memory_range: + min: 0 + max: 298063680 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 1145 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 1145 + job_id: j56y8mq6p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -190,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:26:24Z' + timestamp: '2024-12-11T22:48:42Z' - torchscript_onnx_tflite: - inference_time: 19323.0 - throughput: 51.751798374993534 + inference_time: 19189.0 + throughput: 52.113189848350615 estimated_peak_memory_range: - min: 40960 - max: 41580496 + min: 49152 + max: 43096384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -204,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1563 - job_id: jgkeov9yg + job_id: jpv6lmzk5 job_status: Passed torchscript_onnx_qnn: - inference_time: 17410.0 - throughput: 57.43825387708214 + inference_time: 17245.0 + throughput: 57.98782255726297 estimated_peak_memory_range: - min: 692224 - max: 1818768 + min: 684032 + max: 1991736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: j5we8w1z5 + job_id: j5mn0kvdp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -228,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:26:13Z' + timestamp: '2024-12-11T22:48:31Z' - torchscript_onnx_tflite: - inference_time: 181559.0 - throughput: 5.5078514422309 + inference_time: 181565.0 + throughput: 5.50766942968083 estimated_peak_memory_range: - min: 135168 - max: 173129088 + min: 131072 + max: 173137584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -242,22 +257,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1563 - job_id: j5q6z0m7p - job_status: Passed - torchscript_onnx_qnn: - inference_time: 177676.0 - throughput: 5.628222157184989 - estimated_peak_memory_range: - min: 765952 - max: 7918176 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 1255 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 1255 - job_id: jp1472vkp + job_id: jped7x4o5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -266,13 +266,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:26:15Z' + timestamp: '2024-12-11T22:48:33Z' - torchscript_onnx_tflite: - inference_time: 19365.0 - throughput: 51.639555899819264 + inference_time: 19212.0 + throughput: 52.05080158234437 estimated_peak_memory_range: - min: 40960 - max: 40759792 + min: 36864 + max: 41470904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,14 +280,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1563 - job_id: jglvo41e5 + job_id: jgz3lyvo5 job_status: Passed torchscript_onnx_qnn: - inference_time: 17659.0 - throughput: 56.62834815108443 + inference_time: 17589.0 + throughput: 56.853715390300756 estimated_peak_memory_range: - min: 651264 - max: 1865504 + min: 675840 + max: 2014616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,7 +295,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: jgdx8nzkp + job_id: jp2krd3rp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -304,13 +304,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:26:16Z' + timestamp: '2024-12-11T22:48:34Z' - torchscript_onnx_tflite: - inference_time: 27412.0 - throughput: 36.480373559025246 + inference_time: 27384.0 + throughput: 36.51767455448437 estimated_peak_memory_range: - min: 36864 - max: 161347872 + min: 20480 + max: 161796064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,14 +318,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1563 - job_id: j56yr2dvp + job_id: j5welzm35 job_status: Passed torchscript_onnx_qnn: - inference_time: 25482.0 - throughput: 39.24338748920807 + inference_time: 25339.0 + throughput: 39.464856545246455 estimated_peak_memory_range: - min: 692224 - max: 6507888 + min: 679936 + max: 7112288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,7 +333,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: j57yk27q5 + job_id: jpy1o2v8p job_status: Passed reference_device_info: name: SA8295P ADP @@ -342,13 +342,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:26:17Z' + timestamp: '2024-12-11T22:48:36Z' - torchscript_onnx_tflite: - inference_time: 19368.0 - throughput: 51.63155720776539 + inference_time: 19214.0 + throughput: 52.04538357447694 estimated_peak_memory_range: - min: 36864 - max: 39728464 + min: 20480 + max: 38351504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,14 +356,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1563 - job_id: jp3jxnwxg + job_id: jg9lz29wg job_status: Passed torchscript_onnx_qnn: - inference_time: 17645.0 - throughput: 56.67327854916407 + inference_time: 17525.0 + throughput: 57.06134094151213 estimated_peak_memory_range: - min: 671744 - max: 1816008 + min: 663552 + max: 2260112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,7 +371,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: jp4lmn9q5 + job_id: jp0zm9e95 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -380,13 +380,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:26:19Z' + timestamp: '2024-12-11T22:48:37Z' - torchscript_onnx_tflite: - inference_time: 26082.0 - throughput: 38.34061805076298 + inference_time: 26072.0 + throughput: 38.35532371893219 estimated_peak_memory_range: - min: 57344 - max: 173292208 + min: 40960 + max: 173244048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,7 +394,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1563 - job_id: jgo2oz44p + job_id: jp14n1q8p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 23989.0 + throughput: 41.68577264579599 + estimated_peak_memory_range: + min: 929792 + max: 7095040 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 1255 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 1255 + job_id: jp8qerwkp job_status: Passed reference_device_info: name: SA8775P ADP @@ -403,13 +418,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:26:20Z' + timestamp: '2024-12-11T22:48:38Z' - torchscript_onnx_tflite: - inference_time: 24588.0 - throughput: 40.67024564828372 + inference_time: 24594.0 + throughput: 40.6603236561763 estimated_peak_memory_range: - min: 16384 - max: 162031088 + min: 40960 + max: 162895664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +432,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1563 - job_id: jpv6eq975 + job_id: jgdxd47rp job_status: Passed torchscript_onnx_qnn: - inference_time: 24434.0 - throughput: 40.92657771957109 + inference_time: 24497.0 + throughput: 40.82132506021146 estimated_peak_memory_range: - min: 659456 - max: 164586176 + min: 626688 + max: 164809920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +447,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: j5mnoedyp + job_id: jgke20rwg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,13 +456,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:26:21Z' - - torchscript_onnx: - inference_time: 37922.0 - throughput: 26.369917198459998 + timestamp: '2024-12-11T22:48:39Z' + - torchscript_onnx_qnn: + inference_time: 18438.0 + throughput: 54.23581733376722 + estimated_peak_memory_range: + min: 602112 + max: 602112 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 1255 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 1255 + job_id: jgn6zqrk5 + job_status: Passed + torchscript_onnx: + inference_time: 37854.0 + throughput: 26.41728747292228 estimated_peak_memory_range: - min: 105349120 - max: 105349120 + min: 106242048 + max: 106242048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -455,7 +485,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1145 - job_id: jpy1qzmrp + job_id: jp3jz7q3g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -464,4 +494,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:26:25Z' + timestamp: '2024-12-11T22:48:43Z' diff --git a/qai_hub_models/models/swin_tiny/perf.yaml b/qai_hub_models/models/swin_tiny/perf.yaml index f9783e7e..d0c3628d 100644 --- a/qai_hub_models/models/swin_tiny/perf.yaml +++ b/qai_hub_models/models/swin_tiny/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: Swin-Tiny universal_assets: - torchscript_onnx_tflite: mq9pdyxyn - torchscript_onnx: mn7lp568q + torchscript_onnx_tflite: mnj4xy2kn + torchscript_onnx: mq8dk93zm performance_metrics: - torchscript_onnx_tflite: - inference_time: 11833.0 - throughput: 84.50942280064227 + inference_time: 11898.0 + throughput: 84.04773911581779 estimated_peak_memory_range: - min: 45056 - max: 24356576 + min: 49152 + max: 28228344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 837 - job_id: jp4lmn8q5 + job_id: jp8qer7xp job_status: Passed torchscript_onnx_qnn: - inference_time: 12065.0 - throughput: 82.88437629506838 + inference_time: 12056.0 + throughput: 82.94625082946251 estimated_peak_memory_range: - min: 49152 - max: 29215440 + min: 622592 + max: 25634616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 709 - job_id: j5q6z0z7p + job_id: jgz3ly7k5 job_status: Passed torchscript_onnx: - inference_time: 17445.0 - throughput: 57.32301519059902 + inference_time: 17581.0 + throughput: 56.87958591661453 estimated_peak_memory_range: - min: 45056 - max: 69645936 + min: 32768 + max: 69161512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 623 - job_id: jp14727kp + job_id: j5mn0kwdp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:25:30Z' + timestamp: '2024-12-11T22:47:48Z' - torchscript_onnx_tflite: - inference_time: 8179.0 - throughput: 122.26433549333659 + inference_time: 8160.0 + throughput: 122.54901960784314 estimated_peak_memory_range: - min: 20480 - max: 115947616 + min: 32768 + max: 115557376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 837 - job_id: jpxk39mj5 + job_id: jgke20y2g job_status: Passed torchscript_onnx_qnn: - inference_time: 8131.0 - throughput: 122.98610257040954 + inference_time: 8328.0 + throughput: 120.07684918347742 estimated_peak_memory_range: - min: 618496 - max: 113926848 + min: 634880 + max: 113955632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 709 - job_id: jglvo4oe5 + job_id: j5welzd65 job_status: Passed torchscript_onnx: - inference_time: 11802.0 - throughput: 84.7314014573801 + inference_time: 11924.0 + throughput: 83.86447500838645 estimated_peak_memory_range: min: 0 - max: 464466912 + max: 463696096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 623 - job_id: jgdx8n8kp + job_id: jgn6zq9k5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:25:31Z' + timestamp: '2024-12-11T22:47:49Z' - torchscript_onnx_tflite: - inference_time: 6161.0 - throughput: 162.31131309852296 + inference_time: 6185.0 + throughput: 161.68148746968473 estimated_peak_memory_range: - min: 16384 - max: 111732464 + min: 12288 + max: 111128512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 837 - job_id: j5mnoe4yp + job_id: j5q6l124p job_status: Passed torchscript_onnx_qnn: - inference_time: 7398.0 - throughput: 135.17166801838334 + inference_time: 7302.0 + throughput: 136.9487811558477 estimated_peak_memory_range: - min: 638976 - max: 112219696 + min: 684032 + max: 111725664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 709 - job_id: j56yr2rvp + job_id: jg9lz23lg job_status: Passed torchscript_onnx: - inference_time: 10571.0 - throughput: 94.59842966606755 + inference_time: 10541.0 + throughput: 94.86765961483731 estimated_peak_memory_range: - min: 20480 - max: 199062192 + min: 0 + max: 199440448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 623 - job_id: j57yk2kq5 + job_id: jprvld40g job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:25:32Z' + timestamp: '2024-12-11T22:47:50Z' - torchscript_onnx_tflite: - inference_time: 11899.0 - throughput: 84.04067568703252 + inference_time: 11841.0 + throughput: 84.45232666159953 estimated_peak_memory_range: - min: 16384 - max: 25586000 + min: 28672 + max: 24442824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 837 - job_id: jgn6o0ov5 + job_id: jglvy8k85 job_status: Passed torchscript_onnx_qnn: - inference_time: 10546.0 - throughput: 94.82268158543523 + inference_time: 10550.0 + throughput: 94.7867298578199 estimated_peak_memory_range: - min: 671744 - max: 1968560 + min: 630784 + max: 2039104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 709 - job_id: jp3jxnxxg + job_id: jp14n1d2p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:25:21Z' + timestamp: '2024-12-11T22:47:38Z' - torchscript_onnx_tflite: - inference_time: 96559.0 - throughput: 10.356362431259644 + inference_time: 96608.0 + throughput: 10.351109638953297 estimated_peak_memory_range: - min: 16384 - max: 112090256 + min: 40960 + max: 111486288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 837 - job_id: jprvo6ovg + job_id: j56y8m10p job_status: Passed torchscript_onnx_qnn: - inference_time: 94322.0 - throughput: 10.60198044994805 + inference_time: 94285.0 + throughput: 10.6061409556133 estimated_peak_memory_range: - min: 593920 - max: 6463008 + min: 692224 + max: 11096688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 709 - job_id: jpv6eqe75 + job_id: j5welzd35 job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:25:24Z' + timestamp: '2024-12-11T22:47:41Z' - torchscript_onnx_tflite: - inference_time: 11909.0 - throughput: 83.97010664203543 + inference_time: 11868.0 + throughput: 84.26019548365352 estimated_peak_memory_range: - min: 73728 - max: 24864864 + min: 40960 + max: 26495152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 837 - job_id: jp2k4x4xp + job_id: jp3jz7mlg job_status: Passed torchscript_onnx_qnn: - inference_time: 10608.0 - throughput: 94.2684766214178 + inference_time: 10590.0 + throughput: 94.42870632672333 estimated_peak_memory_range: - min: 692224 - max: 1893552 + min: 24576 + max: 1195504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 709 - job_id: jgjvodo7g + job_id: jg9lz23wg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:25:25Z' + timestamp: '2024-12-11T22:47:42Z' - torchscript_onnx_tflite: - inference_time: 17323.0 - throughput: 57.72672169947469 + inference_time: 17299.0 + throughput: 57.80680964217585 estimated_peak_memory_range: - min: 40960 - max: 104032912 + min: 36864 + max: 104389680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 837 - job_id: jpy1qzqrp + job_id: jgo2lwvxp job_status: Passed torchscript_onnx_qnn: - inference_time: 15424.0 - throughput: 64.83402489626556 + inference_time: 15586.0 + throughput: 64.16014371872193 estimated_peak_memory_range: - min: 663552 - max: 6310816 + min: 3022848 + max: 9259280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 709 - job_id: jped8o875 + job_id: jp14n1d8p job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:25:26Z' + timestamp: '2024-12-11T22:47:43Z' - torchscript_onnx_tflite: - inference_time: 11905.0 - throughput: 83.99832003359933 + inference_time: 11918.0 + throughput: 83.90669575432119 estimated_peak_memory_range: - min: 49152 - max: 22833760 + min: 16384 + max: 24232112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 837 - job_id: jp0zd4d25 + job_id: jpv6lmwj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 10664.0 - throughput: 93.7734433608402 + inference_time: 10670.0 + throughput: 93.72071227741331 estimated_peak_memory_range: - min: 704512 - max: 1893232 + min: 684032 + max: 1950176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 709 - job_id: jgz3828z5 + job_id: jgdxd4rrp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:25:27Z' + timestamp: '2024-12-11T22:47:45Z' - torchscript_onnx_tflite: - inference_time: 16057.0 - throughput: 62.27813414710095 + inference_time: 16007.0 + throughput: 62.47266820765915 estimated_peak_memory_range: - min: 20480 - max: 108323472 + min: 49152 + max: 113834064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 837 - job_id: jp8q626zp + job_id: jgjvrylxg job_status: Passed torchscript_onnx_qnn: - inference_time: 14563.0 - throughput: 68.66717022591499 + inference_time: 14644.0 + throughput: 68.28735318219066 estimated_peak_memory_range: - min: 0 - max: 5860752 + min: 622592 + max: 6953552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 709 - job_id: j5we8w8z5 + job_id: jp4ly4x85 job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:25:28Z' + timestamp: '2024-12-11T22:47:46Z' - torchscript_onnx_tflite: - inference_time: 15500.0 - throughput: 64.51612903225806 + inference_time: 15488.0 + throughput: 64.56611570247934 estimated_peak_memory_range: - min: 24576 - max: 107943632 + min: 0 + max: 110737472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 837 - job_id: jgkeovoyg + job_id: jped7xv15 job_status: Passed torchscript_onnx_qnn: - inference_time: 15162.0 - throughput: 65.95435958316845 + inference_time: 15274.0 + throughput: 65.470734581642 estimated_peak_memory_range: - min: 634880 - max: 110936992 + min: 622592 + max: 116349712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 709 - job_id: jg9lk0kqg + job_id: jpxklr735 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:25:29Z' + timestamp: '2024-12-11T22:47:47Z' - torchscript_onnx_qnn: - inference_time: 11229.0 - throughput: 89.0551251224508 + inference_time: 11242.0 + throughput: 88.95214374666429 estimated_peak_memory_range: min: 602112 max: 602112 @@ -485,7 +485,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 709 - job_id: jgo2ozo4p + job_id: jgdxd4rep + job_status: Passed + torchscript_onnx: + inference_time: 20991.0 + throughput: 47.63946453241866 + estimated_peak_memory_range: + min: 61693952 + max: 61693952 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 623 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 623 + job_id: jp2krd7rp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -494,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:25:33Z' + timestamp: '2024-12-11T22:47:51Z' diff --git a/qai_hub_models/models/trocr/export.py b/qai_hub_models/models/trocr/export.py index 2b7a369b..220d61cb 100644 --- a/qai_hub_models/models/trocr/export.py +++ b/qai_hub_models/models/trocr/export.py @@ -31,7 +31,7 @@ export_without_hub_access, ) -ALL_COMPONENTS = ["TrOCREncoder", "TrOCRDecoder"] +ALL_COMPONENTS = ["TrOCRDecoder", "TrOCREncoder"] def export_model( @@ -123,10 +123,10 @@ def export_model( # 1. Instantiates a PyTorch model and converts it to a traced TorchScript format model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs)) components_dict: dict[str, BaseModel] = {} - if "TrOCREncoder" in components: - components_dict["TrOCREncoder"] = model.encoder # type: ignore if "TrOCRDecoder" in components: components_dict["TrOCRDecoder"] = model.decoder # type: ignore + if "TrOCREncoder" in components: + components_dict["TrOCREncoder"] = model.encoder # type: ignore compile_jobs: dict[str, hub.client.CompileJob] = {} for component_name, component in components_dict.items(): diff --git a/qai_hub_models/models/trocr/perf.yaml b/qai_hub_models/models/trocr/perf.yaml index 64581af2..53dc4158 100644 --- a/qai_hub_models/models/trocr/perf.yaml +++ b/qai_hub_models/models/trocr/perf.yaml @@ -42,55 +42,55 @@ aggregated: - SA8295P - SA8650P Proxy models: -- name: TrOCREncoder +- name: TrOCRDecoder universal_assets: - torchscript_onnx_tflite: mng1p77rn - torchscript_onnx: mnjxw98yq + torchscript_onnx_tflite: mq8dk92zm + torchscript_onnx: mmd3yzg3n performance_metrics: - torchscript_onnx_tflite: - inference_time: 50056.0 - throughput: 19.977625059932876 + inference_time: 2203.0 + throughput: 453.92646391284615 estimated_peak_memory_range: - min: 7159808 - max: 32900144 + min: 16384 + max: 150250880 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 591 + layers_on_npu: 399 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 591 - job_id: jglvo4025 + total_layers: 399 + job_id: jg9lz2ylg job_status: Passed torchscript_onnx_qnn: - inference_time: 52421.0 - throughput: 19.076324373819652 + inference_time: 2367.0 + throughput: 422.4757076468103 estimated_peak_memory_range: - min: 1871872 - max: 23982792 + min: 2523136 + max: 372339872 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 443 + layers_on_npu: 375 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 443 - job_id: jglvo4e25 + total_layers: 375 + job_id: jgjvry2xg job_status: Passed torchscript_onnx: - inference_time: 38114.0 - throughput: 26.23707823896731 + inference_time: 2762.0 + throughput: 362.0564808110065 estimated_peak_memory_range: - min: 16384 - max: 58740784 + min: 856064 + max: 3257600 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 396 + layers_on_npu: 395 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 396 - job_id: jgkeovdvg + total_layers: 395 + job_id: jgo2lw3xp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,51 +99,51 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:24:32Z' + timestamp: '2024-12-11T22:46:49Z' - torchscript_onnx_tflite: - inference_time: 39560.0 - throughput: 25.278058645096056 + inference_time: 1561.0 + throughput: 640.6149903907751 estimated_peak_memory_range: - min: 31793152 - max: 93248752 + min: 12288 + max: 50720384 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 591 + layers_on_npu: 399 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 591 - job_id: jp3jxn4mg + total_layers: 399 + job_id: jgdxd4qep job_status: Passed torchscript_onnx_qnn: - inference_time: 41432.0 - throughput: 24.135933577910794 + inference_time: 1729.0 + throughput: 578.368999421631 estimated_peak_memory_range: - min: 1855488 - max: 63331984 + min: 0 + max: 52979680 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 443 + layers_on_npu: 375 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 443 - job_id: jp3jxnqmg + total_layers: 375 + job_id: jgz3lyjk5 job_status: Passed torchscript_onnx: - inference_time: 31563.0 - throughput: 31.682666413205336 + inference_time: 2100.0 + throughput: 476.1904761904762 estimated_peak_memory_range: min: 0 - max: 270283344 + max: 182149456 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 396 + layers_on_npu: 395 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 396 - job_id: jglvo4725 + total_layers: 395 + job_id: jgjvryexg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,51 +152,51 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:24:34Z' + timestamp: '2024-12-11T22:46:51Z' - torchscript_onnx_tflite: - inference_time: 35481.0 - throughput: 28.184098531608466 + inference_time: 1451.0 + throughput: 689.1798759476223 estimated_peak_memory_range: - min: 7147520 - max: 72752960 + min: 8192 + max: 47800832 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 591 + layers_on_npu: 399 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 591 - job_id: jpv6eq1z5 + total_layers: 399 + job_id: jp4ly4dv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 37937.0 - throughput: 26.359490734639007 + inference_time: 1545.0 + throughput: 647.2491909385113 estimated_peak_memory_range: - min: 1806336 - max: 68835744 + min: 0 + max: 47971344 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 443 + layers_on_npu: 375 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 443 - job_id: jpv6eqzz5 + total_layers: 375 + job_id: jg9lz2wlg job_status: Passed torchscript_onnx: - inference_time: 25867.0 - throughput: 38.65929562763367 + inference_time: 1880.0 + throughput: 531.9148936170212 estimated_peak_memory_range: - min: 9142272 - max: 135478960 + min: 0 + max: 138747440 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 396 + layers_on_npu: 395 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 396 - job_id: jp3jxn8mg + total_layers: 395 + job_id: jgz3lyrk5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,36 +205,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:24:35Z' + timestamp: '2024-12-11T22:46:52Z' - torchscript_onnx_tflite: - inference_time: 50017.0 - throughput: 19.993202311214187 + inference_time: 2205.0 + throughput: 453.51473922902494 estimated_peak_memory_range: - min: 9773056 - max: 34822600 + min: 16384 + max: 381294304 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 591 + layers_on_npu: 399 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 591 - job_id: jped8or85 + total_layers: 399 + job_id: j5mn0k6wp job_status: Passed torchscript_onnx_qnn: - inference_time: 36736.0 - throughput: 27.221254355400696 + inference_time: 2250.0 + throughput: 444.44444444444446 estimated_peak_memory_range: - min: 1892352 - max: 3203960 + min: 69632 + max: 1432640 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 443 + layers_on_npu: 375 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 443 - job_id: jped8o485 + total_layers: 375 + job_id: jgdxd4oep job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,36 +243,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:24:16Z' + timestamp: '2024-12-11T22:46:33Z' - torchscript_onnx_tflite: - inference_time: 266256.0 - throughput: 3.7557839072171144 + inference_time: 12302.0 + throughput: 81.28759551292472 estimated_peak_memory_range: - min: 7225344 - max: 72174704 + min: 262144 + max: 46561248 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 591 + layers_on_npu: 399 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 591 - job_id: j5we8wd45 + total_layers: 399 + job_id: jprvld29g job_status: Passed torchscript_onnx_qnn: - inference_time: 248746.0 - throughput: 4.0201651483842955 + inference_time: 12414.0 + throughput: 80.55421298533913 estimated_peak_memory_range: - min: 1994752 - max: 9174208 + min: 7311360 + max: 17588240 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 443 + layers_on_npu: 375 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 443 - job_id: jp1472qnp + total_layers: 375 + job_id: j5mn0k3wp job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,36 +281,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:24:20Z' + timestamp: '2024-12-11T22:46:37Z' - torchscript_onnx_tflite: - inference_time: 50949.0 - throughput: 19.627470607862765 + inference_time: 2210.0 + throughput: 452.4886877828054 estimated_peak_memory_range: - min: 7139328 - max: 32426544 + min: 20480 + max: 91630472 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 591 + layers_on_npu: 399 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 591 - job_id: jp1472dnp + total_layers: 399 + job_id: jpy1o2j7p job_status: Passed torchscript_onnx_qnn: - inference_time: 37437.0 - throughput: 26.71154205732297 + inference_time: 2316.0 + throughput: 431.77892918825563 estimated_peak_memory_range: - min: 1912832 - max: 10214936 + min: 679936 + max: 2555664 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 443 + layers_on_npu: 375 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 443 - job_id: j57yk2vn5 + total_layers: 375 + job_id: jprvlde9g job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,36 +319,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:24:22Z' + timestamp: '2024-12-11T22:46:39Z' - torchscript_onnx_tflite: - inference_time: 65315.0 - throughput: 15.310418739952537 + inference_time: 3067.0 + throughput: 326.0515161395501 estimated_peak_memory_range: - min: 7155712 - max: 69891760 + min: 32768 + max: 43869504 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 591 + layers_on_npu: 399 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 591 - job_id: j57yk2jn5 + total_layers: 399 + job_id: jp8qermxp job_status: Passed torchscript_onnx_qnn: - inference_time: 50613.0 - throughput: 19.757769742951417 + inference_time: 3740.0 + throughput: 267.379679144385 estimated_peak_memory_range: - min: 1916928 - max: 7939776 + min: 7409664 + max: 13511280 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 443 + layers_on_npu: 375 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 443 - job_id: jpxk39e85 + total_layers: 375 + job_id: jpy1o267p job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,36 +357,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:24:24Z' + timestamp: '2024-12-11T22:46:41Z' - torchscript_onnx_tflite: - inference_time: 50607.0 - throughput: 19.760112237437507 + inference_time: 2270.0 + throughput: 440.52863436123346 estimated_peak_memory_range: - min: 7155712 - max: 35959696 + min: 16384 + max: 362671992 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 591 + layers_on_npu: 399 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 591 - job_id: jpxk39785 + total_layers: 399 + job_id: j5q6l1r4p job_status: Passed torchscript_onnx_qnn: - inference_time: 37746.0 - throughput: 26.492873417050813 + inference_time: 2350.0 + throughput: 425.531914893617 estimated_peak_memory_range: - min: 1929216 - max: 3273744 + min: 2510848 + max: 3837072 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 443 + layers_on_npu: 375 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 443 - job_id: jgn6o0rj5 + total_layers: 375 + job_id: jp8qerzxp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,36 +395,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:24:26Z' + timestamp: '2024-12-11T22:46:43Z' - torchscript_onnx_tflite: - inference_time: 59808.0 - throughput: 16.720171214553236 + inference_time: 3341.0 + throughput: 299.311583358276 estimated_peak_memory_range: - min: 6238208 - max: 70707296 + min: 16384 + max: 47075280 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 591 + layers_on_npu: 399 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 591 - job_id: jprvo64kg + total_layers: 399 + job_id: j56y8mz0p job_status: Passed torchscript_onnx_qnn: - inference_time: 42298.0 - throughput: 23.64177975317982 + inference_time: 3578.0 + throughput: 279.4857462269424 estimated_peak_memory_range: - min: 0 - max: 5736256 + min: 7446528 + max: 13347520 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 443 + layers_on_npu: 375 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 443 - job_id: jp2k4x36p + total_layers: 375 + job_id: j5q6l134p job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,36 +433,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:24:28Z' + timestamp: '2024-12-11T22:46:45Z' - torchscript_onnx_tflite: - inference_time: 60447.0 - throughput: 16.543418201068704 + inference_time: 2671.0 + throughput: 374.3916136278547 estimated_peak_memory_range: - min: 7192576 - max: 73428160 + min: 16384 + max: 50048912 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 591 + layers_on_npu: 399 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 591 - job_id: jp0zd4e05 + total_layers: 399 + job_id: jgo2lwnxp job_status: Passed torchscript_onnx_qnn: - inference_time: 63930.0 - throughput: 15.64210855623338 + inference_time: 2740.0 + throughput: 364.963503649635 estimated_peak_memory_range: - min: 0 - max: 63736160 + min: 4399104 + max: 58547728 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 443 + layers_on_npu: 375 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 443 - job_id: jp0zd4k05 + total_layers: 375 + job_id: j56y8mn0p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,36 +471,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:24:30Z' + timestamp: '2024-12-11T22:46:47Z' - torchscript_onnx_qnn: - inference_time: 34007.0 - throughput: 29.40571058899638 + inference_time: 2444.0 + throughput: 409.16530278232403 estimated_peak_memory_range: - min: 1773568 - max: 1773568 + min: 7393280 + max: 7393280 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 443 + layers_on_npu: 375 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 443 - job_id: j5we8wm45 + total_layers: 375 + job_id: jp4ly4vv5 job_status: Passed torchscript_onnx: - inference_time: 36795.0 - throughput: 27.177605652941974 + inference_time: 2741.0 + throughput: 364.8303538854433 estimated_peak_memory_range: - min: 51490816 - max: 51490816 + min: 72687616 + max: 72687616 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 396 + layers_on_npu: 395 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 396 - job_id: jpv6eq4z5 + total_layers: 395 + job_id: jg9lz24lg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,56 +509,56 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:24:37Z' -- name: TrOCRDecoder + timestamp: '2024-12-11T22:46:54Z' +- name: TrOCREncoder universal_assets: - torchscript_onnx_tflite: mn1zy444m - torchscript_onnx: mm6v86j5q + torchscript_onnx_tflite: mngg1l40n + torchscript_onnx: mm5ed7j9m performance_metrics: - torchscript_onnx_tflite: - inference_time: 2210.0 - throughput: 452.4886877828054 + inference_time: 50015.0 + throughput: 19.99400179946016 estimated_peak_memory_range: - min: 16384 - max: 339146776 + min: 7028736 + max: 35427192 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 399 + layers_on_npu: 591 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 399 - job_id: j56yr23np + total_layers: 591 + job_id: jp14n1w2p job_status: Passed torchscript_onnx_qnn: - inference_time: 2348.0 - throughput: 425.89437819420783 + inference_time: 53281.0 + throughput: 18.76841650869916 estimated_peak_memory_range: - min: 69632 - max: 345956904 + min: 1785856 + max: 23398000 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 375 + layers_on_npu: 443 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 375 - job_id: j56yr2qnp + total_layers: 443 + job_id: jped7xw15 job_status: Passed torchscript_onnx: - inference_time: 2799.0 - throughput: 357.27045373347624 + inference_time: 38056.0 + throughput: 26.27706537733866 estimated_peak_memory_range: - min: 16384 - max: 78206704 + min: 94208 + max: 59331232 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 395 + layers_on_npu: 396 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 395 - job_id: j5q6z0wep + total_layers: 396 + job_id: jpv6lmvj5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -567,51 +567,51 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:24:32Z' + timestamp: '2024-12-11T22:46:49Z' - torchscript_onnx_tflite: - inference_time: 1694.0 - throughput: 590.318772136954 + inference_time: 39322.0 + throughput: 25.43105640608311 estimated_peak_memory_range: - min: 8036352 - max: 59820160 + min: 5394432 + max: 69829648 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 399 + layers_on_npu: 591 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 399 - job_id: jgo2oz11p + total_layers: 591 + job_id: j57yenll5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1750.0 - throughput: 571.4285714285714 + inference_time: 41406.0 + throughput: 24.15108921412356 estimated_peak_memory_range: - min: 0 - max: 52814208 + min: 1835008 + max: 64399312 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 375 + layers_on_npu: 443 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 375 - job_id: jgo2oze1p + total_layers: 443 + job_id: j5welzq65 job_status: Passed torchscript_onnx: - inference_time: 2164.0 - throughput: 462.1072088724584 + inference_time: 31095.0 + throughput: 32.15951117543013 estimated_peak_memory_range: min: 0 - max: 181907840 + max: 271864016 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 395 + layers_on_npu: 396 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 395 - job_id: j56yr2vnp + total_layers: 396 + job_id: jped7xk15 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -620,51 +620,51 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:24:34Z' + timestamp: '2024-12-11T22:46:51Z' - torchscript_onnx_tflite: - inference_time: 1573.0 - throughput: 635.7279084551811 + inference_time: 36222.0 + throughput: 27.607531334548064 estimated_peak_memory_range: - min: 12288 - max: 46223648 + min: 5459968 + max: 71729248 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 399 + layers_on_npu: 591 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 399 - job_id: jgjvod01g + total_layers: 591 + job_id: jpxklr615 job_status: Passed torchscript_onnx_qnn: - inference_time: 1543.0 - throughput: 648.0881399870383 + inference_time: 33821.0 + throughput: 29.56742852074155 estimated_peak_memory_range: - min: 0 - max: 46980048 + min: 1814528 + max: 68859456 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 375 + layers_on_npu: 443 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 375 - job_id: jgjvodk1g + total_layers: 443 + job_id: jp14n1e2p job_status: Passed torchscript_onnx: - inference_time: 2079.0 - throughput: 481.000481000481 + inference_time: 24497.0 + throughput: 40.82132506021146 estimated_peak_memory_range: - min: 0 - max: 138010608 + min: 17055744 + max: 144422448 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 395 + layers_on_npu: 396 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 395 - job_id: jgo2ozm1p + total_layers: 396 + job_id: j5welz965 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -673,36 +673,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:24:36Z' + timestamp: '2024-12-11T22:46:53Z' - torchscript_onnx_tflite: - inference_time: 2195.0 - throughput: 455.58086560364467 + inference_time: 49833.0 + throughput: 20.067023859691368 estimated_peak_memory_range: - min: 16384 - max: 378955912 + min: 7118848 + max: 33627456 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 399 + layers_on_npu: 591 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 399 - job_id: jgz382x45 + total_layers: 591 + job_id: jgn6zqmr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2249.0 - throughput: 444.642063139173 + inference_time: 36818.0 + throughput: 27.16062795371829 estimated_peak_memory_range: - min: 1314816 - max: 2688784 + min: 1875968 + max: 8263936 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 375 + layers_on_npu: 443 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 375 - job_id: jgz382v45 + total_layers: 443 + job_id: j57yenxl5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -711,36 +711,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:24:16Z' + timestamp: '2024-12-11T22:46:33Z' - torchscript_onnx_tflite: - inference_time: 12213.0 - throughput: 81.87996397281586 + inference_time: 266530.0 + throughput: 3.7519228604659887 estimated_peak_memory_range: - min: 262144 - max: 45584624 + min: 7213056 + max: 72360640 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 399 + layers_on_npu: 591 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 399 - job_id: jg9lk03mg + total_layers: 591 + job_id: jp2krd94p job_status: Passed torchscript_onnx_qnn: - inference_time: 12403.0 - throughput: 80.62565508344755 + inference_time: 247644.0 + throughput: 4.0380546268029915 estimated_peak_memory_range: - min: 7376896 - max: 13051472 + min: 1888256 + max: 12262368 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 375 + layers_on_npu: 443 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 375 - job_id: jgdx8n76p + total_layers: 443 + job_id: jgn6zq3r5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -749,36 +749,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:24:20Z' + timestamp: '2024-12-11T22:46:38Z' - torchscript_onnx_tflite: - inference_time: 2203.0 - throughput: 453.92646391284615 + inference_time: 50253.0 + throughput: 19.89930949396056 estimated_peak_memory_range: - min: 16384 - max: 299260304 + min: 7122944 + max: 31626096 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 399 + layers_on_npu: 591 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 399 - job_id: jgdx8nr6p + total_layers: 591 + job_id: jp0zm9265 job_status: Passed torchscript_onnx_qnn: - inference_time: 2373.0 - throughput: 421.40750105351873 + inference_time: 37723.0 + throughput: 26.50902632346314 estimated_peak_memory_range: - min: 1925120 - max: 3362936 + min: 1990656 + max: 3671032 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 375 + layers_on_npu: 443 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 375 - job_id: jp4lmnj25 + total_layers: 443 + job_id: jp2krdl4p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -787,36 +787,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:24:22Z' - - torchscript_onnx_tflite: - inference_time: 3062.0 - throughput: 326.5839320705421 - estimated_peak_memory_range: - min: 32768 - max: 42156448 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 399 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 399 - job_id: jp4lmnx25 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 3494.0 - throughput: 286.20492272467084 + timestamp: '2024-12-11T22:46:40Z' + - torchscript_onnx_qnn: + inference_time: 50866.0 + throughput: 19.659497503243816 estimated_peak_memory_range: - min: 7413760 - max: 13158544 + min: 4259840 + max: 10664704 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 375 + layers_on_npu: 443 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 375 - job_id: j5mnoev7p + total_layers: 443 + job_id: jp0zm9l65 job_status: Passed reference_device_info: name: SA8295P ADP @@ -825,36 +810,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:24:24Z' + timestamp: '2024-12-11T22:46:41Z' - torchscript_onnx_tflite: - inference_time: 2193.0 - throughput: 455.99635202918375 + inference_time: 50307.0 + throughput: 19.877949390740852 estimated_peak_memory_range: - min: 32768 - max: 341058328 + min: 7143424 + max: 35617920 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 399 + layers_on_npu: 591 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 399 - job_id: j5mnoew7p + total_layers: 591 + job_id: jglvy8285 job_status: Passed torchscript_onnx_qnn: - inference_time: 2268.0 - throughput: 440.9171075837742 + inference_time: 37010.0 + throughput: 27.01972439881113 estimated_peak_memory_range: - min: 1454080 - max: 2901872 + min: 1912832 + max: 3648264 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 375 + layers_on_npu: 443 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 375 - job_id: jprvo61kg + total_layers: 443 + job_id: jgke2032g job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -863,36 +848,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:24:26Z' + timestamp: '2024-12-11T22:46:43Z' - torchscript_onnx_tflite: - inference_time: 3410.0 - throughput: 293.2551319648094 + inference_time: 59803.0 + throughput: 16.721569152049227 estimated_peak_memory_range: - min: 36864 - max: 45268960 + min: 7163904 + max: 72570832 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 399 + layers_on_npu: 591 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 399 - job_id: jp2k4x76p + total_layers: 591 + job_id: jp3jz71lg job_status: Passed torchscript_onnx_qnn: - inference_time: 3581.0 - throughput: 279.2516056967328 + inference_time: 42412.0 + throughput: 23.578232575686126 estimated_peak_memory_range: - min: 7430144 - max: 13157536 + min: 1867776 + max: 8045552 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 375 + layers_on_npu: 443 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 375 - job_id: jpy1qzv0p + total_layers: 443 + job_id: jglvy8385 job_status: Passed reference_device_info: name: SA8775P ADP @@ -901,36 +886,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:24:28Z' + timestamp: '2024-12-11T22:46:45Z' - torchscript_onnx_tflite: - inference_time: 2608.0 - throughput: 383.4355828220859 + inference_time: 60304.0 + throughput: 16.582647917219422 estimated_peak_memory_range: - min: 32768 - max: 49550976 + min: 7192576 + max: 71842864 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 399 + layers_on_npu: 591 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 399 - job_id: jp8q62wqp + total_layers: 591 + job_id: jpv6lmrj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2863.0 - throughput: 349.28396786587496 + inference_time: 63000.0 + throughput: 15.873015873015873 estimated_peak_memory_range: - min: 6230016 - max: 56878480 + min: 0 + max: 67100800 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 375 + layers_on_npu: 443 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 375 - job_id: jp8q628qp + total_layers: 443 + job_id: jp3jz7elg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -939,36 +924,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:24:30Z' + timestamp: '2024-12-11T22:46:47Z' - torchscript_onnx_qnn: - inference_time: 2405.0 - throughput: 415.8004158004158 + inference_time: 34029.0 + throughput: 29.386699579770195 estimated_peak_memory_range: - min: 7405568 - max: 7405568 + min: 1773568 + max: 1773568 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 375 + layers_on_npu: 443 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 375 - job_id: jg9lk09mg + total_layers: 443 + job_id: jpxklry15 job_status: Passed torchscript_onnx: - inference_time: 2740.0 - throughput: 364.963503649635 + inference_time: 36913.0 + throughput: 27.09072684420123 estimated_peak_memory_range: - min: 71192576 - max: 71192576 + min: 51458048 + max: 51458048 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 395 + layers_on_npu: 396 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 395 - job_id: jgjvod11g + total_layers: 396 + job_id: jp14n182p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -977,4 +962,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:24:38Z' + timestamp: '2024-12-11T22:46:54Z' diff --git a/qai_hub_models/models/unet_segmentation/perf.yaml b/qai_hub_models/models/unet_segmentation/perf.yaml index 2dc544c5..57d2d125 100644 --- a/qai_hub_models/models/unet_segmentation/perf.yaml +++ b/qai_hub_models/models/unet_segmentation/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: Unet-Segmentation universal_assets: - torchscript_onnx_tflite: mq9pdv62n - torchscript_onnx: mq9pdv12n + torchscript_onnx_tflite: mnw8e983n + torchscript_onnx: mnl6v2v3n performance_metrics: - torchscript_onnx_tflite: - inference_time: 152941.0 - throughput: 6.538469082848942 + inference_time: 151304.0 + throughput: 6.609210595886427 estimated_peak_memory_range: - min: 6594560 - max: 488175696 + min: 6598656 + max: 491345928 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 32 - job_id: jpxk39y85 + job_id: jgz3ly265 job_status: Passed torchscript_onnx_qnn: - inference_time: 154481.0 - throughput: 6.47328797716224 + inference_time: 151054.0 + throughput: 6.6201490857574115 estimated_peak_memory_range: - min: 9871360 - max: 38973784 + min: 9969664 + max: 35291808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 52 - job_id: jglvo4k25 + job_id: jp4ly4kv5 job_status: Passed torchscript_onnx: - inference_time: 153838.0 - throughput: 6.5003445182594675 + inference_time: 154282.0 + throughput: 6.481637520903281 estimated_peak_memory_range: - min: 17133568 - max: 20356352 + min: 192512 + max: 1913669384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jgdx8nv6p + job_id: jglvy8685 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:23:14Z' + timestamp: '2024-12-11T22:45:34Z' - torchscript_onnx_tflite: - inference_time: 113773.0 - throughput: 8.789431587459239 + inference_time: 112939.0 + throughput: 8.854337297124996 estimated_peak_memory_range: - min: 6148096 - max: 96169696 + min: 5136384 + max: 96014352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 32 - job_id: j5mnoe37p + job_id: j5welzwj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 110791.0 - throughput: 9.026003917285701 + inference_time: 110493.0 + throughput: 9.05034708081055 estimated_peak_memory_range: - min: 9867264 - max: 97013792 + min: 9846784 + max: 100597168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 52 - job_id: j56yr21np + job_id: jpxklrn15 job_status: Passed torchscript_onnx: - inference_time: 112812.0 - throughput: 8.86430521575719 + inference_time: 113478.0 + throughput: 8.812280794515237 estimated_peak_memory_range: - min: 831488 - max: 425825520 + min: 626688 + max: 424076576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: j57yk2dn5 + job_id: j56y8me0p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:23:15Z' + timestamp: '2024-12-11T22:45:35Z' - torchscript_onnx_tflite: - inference_time: 102108.0 - throughput: 9.79355192541231 + inference_time: 102522.0 + throughput: 9.754004018649656 estimated_peak_memory_range: - min: 4603904 - max: 110392640 + min: 5750784 + max: 111898016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 32 - job_id: jgn6o03j5 + job_id: jg9lz20vg job_status: Passed torchscript_onnx_qnn: - inference_time: 89253.0 - throughput: 11.204105184139468 + inference_time: 89788.0 + throughput: 11.137345747761394 estimated_peak_memory_range: - min: 9940992 - max: 115417824 + min: 9859072 + max: 116242384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 52 - job_id: jp3jxnmmg + job_id: j5mn0kqwp job_status: Passed torchscript_onnx: - inference_time: 103520.0 - throughput: 9.659969088098919 + inference_time: 104690.0 + throughput: 9.552010698251982 estimated_peak_memory_range: - min: 14934016 - max: 140022256 + min: 14077952 + max: 139530448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jp4lmnw25 + job_id: jp3jz7vlg job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:23:16Z' + timestamp: '2024-12-11T22:45:37Z' - torchscript_onnx_tflite: - inference_time: 157694.0 - throughput: 6.341395360635154 + inference_time: 154231.0 + throughput: 6.483780822273084 estimated_peak_memory_range: - min: 3117056 - max: 492120144 + min: 6680576 + max: 482972608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 32 - job_id: jprvo6ekg + job_id: jp14n12lp job_status: Passed torchscript_onnx_qnn: - inference_time: 139616.0 - throughput: 7.162502865001146 + inference_time: 136597.0 + throughput: 7.320804995717329 estimated_peak_memory_range: - min: 10072064 - max: 11334112 + min: 10665984 + max: 12094248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 52 - job_id: jgo2ozv1p + job_id: jgn6zqlr5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,28 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:23:05Z' - - torchscript_onnx_tflite: - inference_time: 7406940.0 - throughput: 0.13500851903755126 - estimated_peak_memory_range: - min: 1634304 - max: 104685408 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 32 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 32 - job_id: jp2k4xl6p - job_status: Passed - torchscript_onnx_qnn: - inference_time: 7399668.0 - throughput: 0.13514119822673126 + timestamp: '2024-12-11T22:45:26Z' + - torchscript_onnx_qnn: + inference_time: 7399753.0 + throughput: 0.13513964587736915 estimated_peak_memory_range: - min: 1548288 - max: 7576640 + min: 5222400 + max: 15353520 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +257,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 52 - job_id: jgjvodl1g + job_id: jp2krd04p job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +266,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:23:08Z' + timestamp: '2024-12-11T22:45:28Z' - torchscript_onnx_tflite: - inference_time: 154760.0 - throughput: 6.461617989144481 + inference_time: 149359.0 + throughput: 6.695277820553164 estimated_peak_memory_range: - min: 6680576 - max: 252918872 + min: 6696960 + max: 257657408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +280,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 32 - job_id: jpy1qz60p + job_id: j5welz265 job_status: Passed torchscript_onnx_qnn: - inference_time: 145080.0 - throughput: 6.892748828232699 + inference_time: 145443.0 + throughput: 6.875545746443624 estimated_peak_memory_range: - min: 10579968 - max: 11881224 + min: 10272768 + max: 11937560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +295,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 52 - job_id: jped8ov85 + job_id: jpy1o2r7p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +304,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:23:09Z' + timestamp: '2024-12-11T22:45:29Z' - torchscript_onnx_tflite: - inference_time: 273606.0 - throughput: 3.654890609124069 + inference_time: 273519.0 + throughput: 3.656053144388507 estimated_peak_memory_range: - min: 6668288 - max: 111399056 + min: 6672384 + max: 111020144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +318,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 32 - job_id: jp0zd4r05 + job_id: jg9lz2jlg job_status: Passed torchscript_onnx_qnn: - inference_time: 266119.0 - throughput: 3.7577174121351726 + inference_time: 266203.0 + throughput: 3.756531669440239 estimated_peak_memory_range: - min: 221184 - max: 6227760 + min: 2658304 + max: 8743504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +333,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 52 - job_id: jgz382745 + job_id: jp0zm9365 job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +342,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:23:10Z' + timestamp: '2024-12-11T22:45:30Z' - torchscript_onnx_tflite: - inference_time: 156964.0 - throughput: 6.370887592059326 + inference_time: 166832.0 + throughput: 5.9940538985326555 estimated_peak_memory_range: - min: 3506176 - max: 493974272 + min: 6721536 + max: 479441112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +356,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 32 - job_id: jp8q627qp + job_id: jp14n1y2p job_status: Passed torchscript_onnx_qnn: - inference_time: 139333.0 - throughput: 7.177050662800629 + inference_time: 145457.0 + throughput: 6.874883986332731 estimated_peak_memory_range: - min: 10104832 - max: 11416696 + min: 10350592 + max: 11490784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +371,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 52 - job_id: j5we8w945 + job_id: jp8qer0xp job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +380,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:23:11Z' + timestamp: '2024-12-11T22:45:31Z' - torchscript_onnx_tflite: - inference_time: 303207.0 - throughput: 3.298076891364645 + inference_time: 303278.0 + throughput: 3.2973047830703184 estimated_peak_memory_range: - min: 6696960 - max: 109201488 + min: 6709248 + max: 109490288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +394,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 32 - job_id: jgkeovyvg + job_id: jgdxd4eep job_status: Passed torchscript_onnx_qnn: - inference_time: 297898.0 - throughput: 3.356853688175147 + inference_time: 297906.0 + throughput: 3.3567635428625135 estimated_peak_memory_range: - min: 888832 - max: 6558896 + min: 0 + max: 6006976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +409,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 52 - job_id: jg9lk04mg + job_id: jgke2072g job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +418,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:23:12Z' + timestamp: '2024-12-11T22:45:32Z' - torchscript_onnx_tflite: - inference_time: 297003.0 - throughput: 3.36696935721188 + inference_time: 273362.0 + throughput: 3.6581529254248943 estimated_peak_memory_range: - min: 6717440 - max: 102239456 + min: 6651904 + max: 96319040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +432,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 32 - job_id: j5q6z02ep + job_id: j57yen0l5 job_status: Passed torchscript_onnx_qnn: - inference_time: 322428.0 - throughput: 3.10146761447517 + inference_time: 306392.0 + throughput: 3.263792788323455 estimated_peak_memory_range: - min: 5492736 - max: 96045488 + min: 5451776 + max: 100683504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +447,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 52 - job_id: jp14728np + job_id: j5q6l1e4p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,13 +456,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:23:13Z' + timestamp: '2024-12-11T22:45:33Z' - torchscript_onnx_qnn: - inference_time: 135772.0 - throughput: 7.365288866629349 + inference_time: 135693.0 + throughput: 7.369576912589448 estimated_peak_memory_range: - min: 9863168 - max: 9863168 + min: 9850880 + max: 9850880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -485,14 +470,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 52 - job_id: jpv6eqwz5 + job_id: jprvld89g job_status: Passed torchscript_onnx: - inference_time: 147629.0 - throughput: 6.773736867417648 + inference_time: 147583.0 + throughput: 6.775848166794279 estimated_peak_memory_range: - min: 56668160 - max: 56668160 + min: 57917440 + max: 57917440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +485,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jpxk39185 + job_id: jgo2lwkxp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +494,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:23:17Z' + timestamp: '2024-12-11T22:45:38Z' diff --git a/qai_hub_models/models/vit/evaluate.py b/qai_hub_models/models/vit/evaluate.py index bf249af0..3752ac2c 100644 --- a/qai_hub_models/models/vit/evaluate.py +++ b/qai_hub_models/models/vit/evaluate.py @@ -25,7 +25,6 @@ def main(): model_cls=Model, default_split_size=2500, supported_datasets=SUPPORTED_DATASETS, - supports_qnn=False, ) args = parser.parse_args() args.device = None diff --git a/qai_hub_models/models/vit/export.py b/qai_hub_models/models/vit/export.py index 1b369c9f..ef00f146 100644 --- a/qai_hub_models/models/vit/export.py +++ b/qai_hub_models/models/vit/export.py @@ -208,7 +208,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser(model_cls=Model) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/vit/perf.yaml b/qai_hub_models/models/vit/perf.yaml index 3161ed72..3c5749d9 100644 --- a/qai_hub_models/models/vit/perf.yaml +++ b/qai_hub_models/models/vit/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: VIT universal_assets: - torchscript_onnx_tflite: mno35w4kn - torchscript_onnx: mm5d9lw2n + torchscript_onnx_tflite: mq36e04rq + torchscript_onnx: mngg1l3rn performance_metrics: - torchscript_onnx_tflite: - inference_time: 17366.0 - throughput: 57.58378440631118 + inference_time: 17359.0 + throughput: 57.607005011809434 estimated_peak_memory_range: - min: 40960 - max: 43009000 + min: 53248 + max: 48451064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1579 - job_id: jp4lmnd15 + job_id: j56y8my7p + job_status: Passed + torchscript_onnx_qnn: + inference_time: 18009.0 + throughput: 55.52779165972569 + estimated_peak_memory_range: + min: 405504 + max: 46205616 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 902 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 902 + job_id: jgdxd4xlp job_status: Passed torchscript_onnx: - inference_time: 14796.0 - throughput: 67.58583400919167 + inference_time: 14785.0 + throughput: 67.63611768684477 estimated_peak_memory_range: - min: 49152 - max: 202513832 + min: 36864 + max: 202763952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 976 - job_id: jp1472e7p + job_id: jgke20vog job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -84,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:22:09Z' + timestamp: '2024-12-11T22:44:27Z' - torchscript_onnx_tflite: - inference_time: 12003.0 - throughput: 83.31250520703158 + inference_time: 12025.0 + throughput: 83.16008316008316 estimated_peak_memory_range: - min: 106033152 - max: 165662496 + min: 20480 + max: 65920528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -98,14 +113,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1579 - job_id: jpxk396l5 + job_id: jp3jz7jzg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 12467.0 + throughput: 80.21175904387583 + estimated_peak_memory_range: + min: 618496 + max: 58240608 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 902 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 902 + job_id: j57yen2r5 job_status: Passed torchscript_onnx: - inference_time: 10637.0 - throughput: 94.01146939926672 + inference_time: 10665.0 + throughput: 93.76465072667604 estimated_peak_memory_range: min: 0 - max: 106356928 + max: 106707088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 976 - job_id: jgdx8nozp + job_id: j5q6l10mp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -122,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:22:10Z' + timestamp: '2024-12-11T22:44:28Z' - torchscript_onnx_tflite: - inference_time: 11388.0 - throughput: 87.81173164734808 + inference_time: 11354.0 + throughput: 88.07468733485996 estimated_peak_memory_range: - min: 16384 - max: 62283888 + min: 32768 + max: 62944640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -136,14 +166,29 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1579 - job_id: j5mnoe69p + job_id: jgo2lw2dp job_status: Passed - torchscript_onnx: - inference_time: 10273.0 - throughput: 97.34254842791785 + torchscript_onnx_qnn: + inference_time: 11652.0 + throughput: 85.82217645039478 estimated_peak_memory_range: min: 0 - max: 76844048 + max: 60127600 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 902 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 902 + job_id: jp4ly4nl5 + job_status: Passed + torchscript_onnx: + inference_time: 10422.0 + throughput: 95.95087315294569 + estimated_peak_memory_range: + min: 614400 + max: 77947808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -151,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 976 - job_id: j5we8wq45 + job_id: jglvy84l5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -160,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:22:11Z' + timestamp: '2024-12-11T22:44:29Z' - torchscript_onnx_tflite: - inference_time: 16928.0 - throughput: 59.07372400756144 + inference_time: 16995.0 + throughput: 58.84083553986466 estimated_peak_memory_range: - min: 28672 - max: 49022496 + min: 20480 + max: 41682848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -174,7 +219,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1579 - job_id: jgn6o0mq5 + job_id: jpv6lm6m5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 15778.0 + throughput: 63.37938902268982 + estimated_peak_memory_range: + min: 684032 + max: 1872832 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 902 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 902 + job_id: jpxklr995 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -183,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:21:49Z' + timestamp: '2024-12-11T22:44:18Z' - torchscript_onnx_tflite: - inference_time: 258719.0 - throughput: 3.865197376304021 + inference_time: 258724.0 + throughput: 3.8651226789938313 estimated_peak_memory_range: - min: 16384 - max: 60916272 + min: 77824 + max: 61123792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -197,7 +257,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1579 - job_id: jprvo627g + job_id: jgjvryv8g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 252338.0 + throughput: 3.9629385982293592 + estimated_peak_memory_range: + min: 647168 + max: 11389936 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 902 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 902 + job_id: jgn6zq0m5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -206,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:21:50Z' + timestamp: '2024-12-11T22:44:20Z' - torchscript_onnx_tflite: - inference_time: 17288.0 - throughput: 57.84359093012494 + inference_time: 17126.0 + throughput: 58.39075090505664 estimated_peak_memory_range: - min: 20480 - max: 42824224 + min: 36864 + max: 49925976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -220,7 +295,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1579 - job_id: jp2k4x9qp + job_id: jped7xd05 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 15766.0 + throughput: 63.42762907522517 + estimated_peak_memory_range: + min: 643072 + max: 2093544 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 902 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 902 + job_id: jprvld6eg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -229,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:21:51Z' + timestamp: '2024-12-11T22:44:21Z' - torchscript_onnx_tflite: - inference_time: 27633.0 - throughput: 36.18861506170159 + inference_time: 27649.0 + throughput: 36.16767333357445 estimated_peak_memory_range: - min: 61440 - max: 55976496 + min: 45056 + max: 57197856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -243,7 +333,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1579 - job_id: jpy1qzjlp + job_id: jgz3ly365 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 22942.0 + throughput: 43.58817888588615 + estimated_peak_memory_range: + min: 700416 + max: 6905120 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 902 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 902 + job_id: jp2krdxmp job_status: Passed reference_device_info: name: SA8295P ADP @@ -252,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:21:52Z' + timestamp: '2024-12-11T22:44:22Z' - torchscript_onnx_tflite: - inference_time: 17409.0 - throughput: 57.44155321959906 + inference_time: 17312.0 + throughput: 57.7634011090573 estimated_peak_memory_range: - min: 24576 - max: 46243456 + min: 16384 + max: 48204688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -266,7 +371,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1579 - job_id: jp0zd42n5 + job_id: j5welzej5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 15751.0 + throughput: 63.488032505872646 + estimated_peak_memory_range: + min: 679936 + max: 1951248 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 902 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 902 + job_id: jpy1o2z4p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -275,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:21:53Z' + timestamp: '2024-12-11T22:44:23Z' - torchscript_onnx_tflite: - inference_time: 24636.0 - throughput: 40.591005033284624 + inference_time: 24662.0 + throughput: 40.548211823858566 estimated_peak_memory_range: - min: 81920 - max: 60995856 + min: 57344 + max: 61647680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -289,7 +409,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1579 - job_id: jp8q62mop + job_id: jg9lz2lvg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 22792.0 + throughput: 43.87504387504387 + estimated_peak_memory_range: + min: 651264 + max: 11504928 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 902 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 902 + job_id: jp0zm94e5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -298,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:21:54Z' + timestamp: '2024-12-11T22:44:25Z' - torchscript_onnx_tflite: - inference_time: 24509.0 - throughput: 40.80133828389571 + inference_time: 24439.0 + throughput: 40.91820450918614 estimated_peak_memory_range: - min: 20480 - max: 56281680 + min: 40960 + max: 61528992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -312,7 +447,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1579 - job_id: jgkeovqng + job_id: jp14n14lp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 23507.0 + throughput: 42.54051984515251 + estimated_peak_memory_range: + min: 0 + max: 55928544 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 902 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 902 + job_id: jp8qer28p job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -321,13 +471,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:21:55Z' - - torchscript_onnx: - inference_time: 20418.0 - throughput: 48.97639337839161 + timestamp: '2024-12-11T22:44:26Z' + - torchscript_onnx_qnn: + inference_time: 16164.0 + throughput: 61.86587478346944 + estimated_peak_memory_range: + min: 602112 + max: 602112 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 902 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 902 + job_id: j5mn0keqp + job_status: Passed + torchscript_onnx: + inference_time: 20503.0 + throughput: 48.77335024142808 estimated_peak_memory_range: - min: 178761728 - max: 178761728 + min: 180137984 + max: 180137984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -335,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 976 - job_id: jg9lk0wmg + job_id: j56y8m27p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -344,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:22:13Z' + timestamp: '2024-12-11T22:44:30Z' diff --git a/qai_hub_models/models/vit_quantized/info.yaml b/qai_hub_models/models/vit_quantized/info.yaml index 48b07d44..2e63bc7a 100644 --- a/qai_hub_models/models/vit_quantized/info.yaml +++ b/qai_hub_models/models/vit_quantized/info.yaml @@ -11,6 +11,7 @@ use_case: Image Classification tags: - backbone - quantized +imsdk_supported: true research_paper: https://arxiv.org/abs/2010.11929 research_paper_title: 'An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale' diff --git a/qai_hub_models/models/vit_quantized/perf.yaml b/qai_hub_models/models/vit_quantized/perf.yaml index f098d664..47ca4f53 100644 --- a/qai_hub_models/models/vit_quantized/perf.yaml +++ b/qai_hub_models/models/vit_quantized/perf.yaml @@ -47,14 +47,14 @@ aggregated: models: - name: VITQuantized universal_assets: - torchscript_onnx: mq8kxzr3q + torchscript_onnx: mn0jxo68m performance_metrics: - torchscript_onnx_qnn: - inference_time: 5271.0 - throughput: 189.7173211914248 + inference_time: 5278.0 + throughput: 189.465706707086 estimated_peak_memory_range: - min: 32768 - max: 108644040 + min: 0 + max: 40754304 primary_compute_unit: NPU precision: int8 layer_info: @@ -62,14 +62,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 903 - job_id: jgo2o181p + job_id: j5mn0rr7p job_status: Passed torchscript_onnx: - inference_time: 52299.0 - throughput: 19.120824489952007 + inference_time: 52233.0 + throughput: 19.144984971186798 estimated_peak_memory_range: - min: 5054464 - max: 8181712 + min: 2617344 + max: 6493992 primary_compute_unit: NPU precision: int8 layer_info: @@ -77,7 +77,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 26 total_layers: 1683 - job_id: jpy1q4m0p + job_id: jgo2ly01p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -86,13 +86,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T03:41:31Z' + timestamp: '2024-12-12T01:10:30Z' - torchscript_onnx_qnn: - inference_time: 3504.0 - throughput: 285.38812785388126 + inference_time: 3531.0 + throughput: 283.2058906825262 estimated_peak_memory_range: min: 12288 - max: 45853456 + max: 45541296 primary_compute_unit: NPU precision: int8 layer_info: @@ -100,14 +100,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 903 - job_id: jpv6e17z5 + job_id: jgn6z22j5 job_status: Passed torchscript_onnx: - inference_time: 37032.0 - throughput: 27.003672499459928 + inference_time: 36756.0 + throughput: 27.206442485580585 estimated_peak_memory_range: - min: 2887680 - max: 407399600 + min: 4046848 + max: 409632240 primary_compute_unit: NPU precision: int8 layer_info: @@ -115,7 +115,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 26 total_layers: 1683 - job_id: jp8q63xqp + job_id: jpv6l3oz5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -124,13 +124,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T03:41:33Z' + timestamp: '2024-12-12T01:10:31Z' - torchscript_onnx_qnn: - inference_time: 3283.0 - throughput: 304.5994517209869 + inference_time: 3265.0 + throughput: 306.2787136294028 estimated_peak_memory_range: min: 0 - max: 65569728 + max: 67146672 primary_compute_unit: NPU precision: int8 layer_info: @@ -138,14 +138,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 903 - job_id: jgjvo0q1g + job_id: jprvlkkkg job_status: Passed torchscript_onnx: - inference_time: 40661.0 - throughput: 24.5935909102088 + inference_time: 33562.0 + throughput: 29.79560216911984 estimated_peak_memory_range: - min: 2969600 - max: 156755648 + min: 2437120 + max: 155394016 primary_compute_unit: NPU precision: int8 layer_info: @@ -153,7 +153,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 26 total_layers: 1683 - job_id: jgkeol4vg + job_id: jgjvrxm1g job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -162,13 +162,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T03:41:35Z' + timestamp: '2024-12-12T01:10:33Z' - torchscript_onnx_qnn: - inference_time: 21635.0 - throughput: 46.22140050843541 + inference_time: 22553.0 + throughput: 44.339999113200015 estimated_peak_memory_range: - min: 217088 - max: 8326288 + min: 212992 + max: 12419568 primary_compute_unit: NPU precision: int8 layer_info: @@ -176,7 +176,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 902 - job_id: jped8ry85 + job_id: jp2kr886p job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -185,13 +185,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T03:41:15Z' + timestamp: '2024-12-12T01:10:12Z' - torchscript_onnx_qnn: - inference_time: 4739.0 - throughput: 211.01498206372653 + inference_time: 4724.0 + throughput: 211.68501270110076 estimated_peak_memory_range: - min: 184320 - max: 2138112 + min: 212992 + max: 1880528 primary_compute_unit: NPU precision: int8 layer_info: @@ -199,7 +199,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 903 - job_id: j5we8d445 + job_id: jpy1oee0p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -208,13 +208,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T03:41:16Z' + timestamp: '2024-12-12T01:10:14Z' - torchscript_onnx_qnn: - inference_time: 38830.0 - throughput: 25.753283543651815 + inference_time: 38793.0 + throughput: 25.77784651870183 estimated_peak_memory_range: min: 176128 - max: 5932304 + max: 10349888 primary_compute_unit: NPU precision: int8 layer_info: @@ -222,7 +222,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 903 - job_id: jgdx8r26p + job_id: jp8qeo9qp job_status: Passed reference_device_info: name: SA7255P ADP @@ -231,13 +231,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T03:41:20Z' + timestamp: '2024-12-12T01:10:18Z' - torchscript_onnx_qnn: - inference_time: 4795.0 - throughput: 208.55057351407717 + inference_time: 4772.0 + throughput: 209.55574182732607 estimated_peak_memory_range: - min: 172032 - max: 1414344 + min: 200704 + max: 1332000 primary_compute_unit: NPU precision: int8 layer_info: @@ -245,7 +245,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 903 - job_id: j57ykj9n5 + job_id: jgke2znvg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -254,13 +254,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T03:41:22Z' + timestamp: '2024-12-12T01:10:20Z' - torchscript_onnx_qnn: - inference_time: 6823.0 - throughput: 146.5630954125751 + inference_time: 6861.0 + throughput: 145.75134819997086 estimated_peak_memory_range: - min: 204800 - max: 6002720 + min: 196608 + max: 6296240 primary_compute_unit: NPU precision: int8 layer_info: @@ -268,7 +268,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 903 - job_id: jpxk37x85 + job_id: j5q6l8kep job_status: Passed reference_device_info: name: SA8295P ADP @@ -277,13 +277,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T03:41:24Z' + timestamp: '2024-12-12T01:10:21Z' - torchscript_onnx_qnn: - inference_time: 4771.0 - throughput: 209.5996646405366 + inference_time: 4743.0 + throughput: 210.8370229812355 estimated_peak_memory_range: - min: 196608 - max: 1482656 + min: 180224 + max: 1819800 primary_compute_unit: NPU precision: int8 layer_info: @@ -291,7 +291,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 903 - job_id: j5mnow87p + job_id: jglvynz25 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -300,13 +300,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T03:41:25Z' + timestamp: '2024-12-12T01:10:23Z' - torchscript_onnx_qnn: - inference_time: 6279.0 - throughput: 159.26102882624622 + inference_time: 6287.0 + throughput: 159.0583744234134 estimated_peak_memory_range: - min: 167936 - max: 6220752 + min: 163840 + max: 10676976 primary_compute_unit: NPU precision: int8 layer_info: @@ -314,7 +314,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 903 - job_id: jgn6o9kj5 + job_id: j56y86jnp job_status: Passed reference_device_info: name: SA8775P ADP @@ -323,13 +323,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T03:41:27Z' + timestamp: '2024-12-12T01:10:25Z' - torchscript_onnx_qnn: - inference_time: 5993.0 - throughput: 166.86133822793258 + inference_time: 6185.0 + throughput: 161.68148746968473 estimated_peak_memory_range: min: 12288 - max: 48603904 + max: 50574464 primary_compute_unit: NPU precision: int8 layer_info: @@ -337,7 +337,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 903 - job_id: jp2k47e6p + job_id: jp3jzk3mg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -346,13 +346,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T03:41:29Z' + timestamp: '2024-12-12T01:10:27Z' - torchscript_onnx_qnn: - inference_time: 5136.0 - throughput: 194.70404984423675 + inference_time: 5161.0 + throughput: 193.7608990505716 estimated_peak_memory_range: - min: 167936 - max: 167936 + min: 172032 + max: 172032 primary_compute_unit: NPU precision: int8 layer_info: @@ -360,14 +360,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 903 - job_id: jg9lk3dmg + job_id: jp0zmyq05 job_status: Passed torchscript_onnx: - inference_time: 72436.0 - throughput: 13.805290187199734 + inference_time: 72286.0 + throughput: 13.833937415267133 estimated_peak_memory_range: - min: 88985600 - max: 88985600 + min: 90005504 + max: 90005504 primary_compute_unit: NPU precision: int8 layer_info: @@ -375,7 +375,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 26 total_layers: 1683 - job_id: j5q6z7yep + job_id: jped79185 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -384,4 +384,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T03:41:37Z' + timestamp: '2024-12-12T01:10:35Z' diff --git a/qai_hub_models/models/whisper_base_en/export.py b/qai_hub_models/models/whisper_base_en/export.py index be20cb40..237a306e 100644 --- a/qai_hub_models/models/whisper_base_en/export.py +++ b/qai_hub_models/models/whisper_base_en/export.py @@ -31,7 +31,7 @@ export_without_hub_access, ) -ALL_COMPONENTS = ["WhisperEncoder", "WhisperDecoder"] +ALL_COMPONENTS = ["WhisperDecoder", "WhisperEncoder"] def export_model( @@ -123,10 +123,10 @@ def export_model( # 1. Instantiates a PyTorch model and converts it to a traced TorchScript format model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs)) components_dict: dict[str, BaseModel] = {} - if "WhisperEncoder" in components: - components_dict["WhisperEncoder"] = model.encoder # type: ignore if "WhisperDecoder" in components: components_dict["WhisperDecoder"] = model.decoder # type: ignore + if "WhisperEncoder" in components: + components_dict["WhisperEncoder"] = model.encoder # type: ignore compile_jobs: dict[str, hub.client.CompileJob] = {} for component_name, component in components_dict.items(): diff --git a/qai_hub_models/models/whisper_base_en/perf.yaml b/qai_hub_models/models/whisper_base_en/perf.yaml index 9893d65c..97792afb 100644 --- a/qai_hub_models/models/whisper_base_en/perf.yaml +++ b/qai_hub_models/models/whisper_base_en/perf.yaml @@ -42,55 +42,55 @@ aggregated: - SA8295P - SA8650P Proxy models: -- name: WhisperEncoder +- name: WhisperDecoder universal_assets: - torchscript_onnx_tflite: mnjxw9w1q - torchscript_onnx: mmr67x7rm + torchscript_onnx_tflite: mn0jx42zm + torchscript_onnx: mno63ogkn performance_metrics: - torchscript_onnx_tflite: - inference_time: 199951.0 - throughput: 5.0012253001985485 + inference_time: 37916.0 + throughput: 26.37409009389176 estimated_peak_memory_range: - min: 12288 - max: 85364312 - primary_compute_unit: GPU + min: 5763072 + max: 45167296 + primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 408 - layers_on_cpu: 11 - total_layers: 419 - job_id: jp8q62qop + layers_on_npu: 983 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 983 + job_id: jp4lye0l5 job_status: Passed torchscript_onnx_qnn: - inference_time: 269428.0 - throughput: 3.71156672654661 + inference_time: 4223.0 + throughput: 236.79848448969926 estimated_peak_memory_range: - min: 143360 - max: 84978624 + min: 21229568 + max: 66509536 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 531 + layers_on_npu: 821 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 531 - job_id: jp2k4xxqp + total_layers: 821 + job_id: jg9lzoevg job_status: Passed torchscript_onnx: - inference_time: 272557.0 - throughput: 3.668957319019508 + inference_time: 15843.0 + throughput: 63.119358707315534 estimated_peak_memory_range: - min: 63717376 - max: 124432096 + min: 65536 + max: 121972776 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 380 + layers_on_npu: 844 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 380 - job_id: jp2k4x0qp + total_layers: 844 + job_id: jgz3lyd65 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,51 +99,51 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:21:03Z' + timestamp: '2024-12-11T22:43:22Z' - torchscript_onnx_tflite: - inference_time: 153295.0 - throughput: 6.523369972928014 + inference_time: 32580.0 + throughput: 30.693677102516883 estimated_peak_memory_range: - min: 38887424 - max: 79113552 - primary_compute_unit: GPU + min: 3858432 + max: 85679120 + primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 408 - layers_on_cpu: 11 - total_layers: 419 - job_id: j5q6z06op + layers_on_npu: 983 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 983 + job_id: j5mn09yqp job_status: Passed torchscript_onnx_qnn: - inference_time: 184254.0 - throughput: 5.427290587992663 + inference_time: 3296.0 + throughput: 303.3980582524272 estimated_peak_memory_range: - min: 0 - max: 303613280 + min: 21213184 + max: 79093088 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 531 + layers_on_npu: 821 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 531 - job_id: jp0zd44n5 + total_layers: 821 + job_id: jgdxd6llp job_status: Passed torchscript_onnx: - inference_time: 206768.0 - throughput: 4.836338311537569 + inference_time: 13657.0 + throughput: 73.22252324815113 estimated_peak_memory_range: - min: 41893888 - max: 974247280 + min: 0 + max: 232413648 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 380 + layers_on_npu: 844 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 380 - job_id: jp0zd43n5 + total_layers: 844 + job_id: jg9lz2nvg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,51 +152,51 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:21:05Z' + timestamp: '2024-12-11T22:43:24Z' - torchscript_onnx_tflite: - inference_time: 145127.0 - throughput: 6.890516582028154 + inference_time: 27503.0 + throughput: 36.359669854197726 estimated_peak_memory_range: - min: 39153664 - max: 59471248 - primary_compute_unit: GPU + min: 4235264 + max: 79486448 + primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 408 - layers_on_cpu: 11 - total_layers: 419 - job_id: jp3jxnnng + layers_on_npu: 983 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 983 + job_id: jprvlxjeg job_status: Passed torchscript_onnx_qnn: - inference_time: 173991.0 - throughput: 5.7474237173187115 + inference_time: 2646.0 + throughput: 377.92894935752076 estimated_peak_memory_range: - min: 0 - max: 319212800 + min: 9228288 + max: 61817344 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 531 + layers_on_npu: 821 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 531 - job_id: jgkeovvng + total_layers: 821 + job_id: jp4ly4rl5 job_status: Passed torchscript_onnx: - inference_time: 181021.0 - throughput: 5.524220946740986 + inference_time: 12074.0 + throughput: 82.8225940036442 estimated_peak_memory_range: - min: 99790848 - max: 668658752 + min: 5988352 + max: 213328736 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 380 + layers_on_npu: 844 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 380 - job_id: jgkeov7ng + total_layers: 844 + job_id: jgdxd41lp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,36 +205,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:21:08Z' + timestamp: '2024-12-11T22:43:26Z' - torchscript_onnx_tflite: - inference_time: 197727.0 - throughput: 5.057478240199871 + inference_time: 38524.0 + throughput: 25.957844460595993 estimated_peak_memory_range: - min: 0 - max: 80630072 - primary_compute_unit: GPU + min: 5783552 + max: 44035000 + primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 408 - layers_on_cpu: 11 - total_layers: 419 - job_id: jpv6eqqr5 + layers_on_npu: 983 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 983 + job_id: jpy1o804p job_status: Passed torchscript_onnx_qnn: - inference_time: 205013.0 - throughput: 4.877739460424461 + inference_time: 4167.0 + throughput: 239.98080153587713 estimated_peak_memory_range: - min: 708608 - max: 1970696 + min: 19914752 + max: 21185208 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 531 + layers_on_npu: 821 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 531 - job_id: jglvo46m5 + total_layers: 821 + job_id: j5mn0kxqp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,36 +243,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:20:47Z' + timestamp: '2024-12-11T22:43:05Z' - torchscript_onnx_tflite: - inference_time: 1157378.0 - throughput: 0.8640219530697836 + inference_time: 67034.0 + throughput: 14.917802905988006 estimated_peak_memory_range: - min: 38821888 - max: 65750624 - primary_compute_unit: GPU + min: 4333568 + max: 79344400 + primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 408 - layers_on_cpu: 11 - total_layers: 419 - job_id: jped8oov5 + layers_on_npu: 983 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 983 + job_id: jp8qejv8p job_status: Passed torchscript_onnx_qnn: - inference_time: 935220.0 - throughput: 1.0692671243129959 + inference_time: 26930.0 + throughput: 37.13330857779428 estimated_peak_memory_range: - min: 704512 - max: 8000384 + min: 9551872 + max: 20319616 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 531 + layers_on_npu: 821 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 531 - job_id: jpv6eq0r5 + total_layers: 821 + job_id: jpy1o234p job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,36 +281,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:20:51Z' + timestamp: '2024-12-11T22:43:10Z' - torchscript_onnx_tflite: - inference_time: 196948.0 - throughput: 5.077482381136138 + inference_time: 38064.0 + throughput: 26.271542664985287 estimated_peak_memory_range: - min: 12288 - max: 114774056 - primary_compute_unit: GPU + min: 5779456 + max: 44356272 + primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 408 - layers_on_cpu: 11 - total_layers: 419 - job_id: j5we8wwm5 + layers_on_npu: 983 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 983 + job_id: j5q6l4omp job_status: Passed torchscript_onnx_qnn: - inference_time: 220231.0 - throughput: 4.540686824289042 + inference_time: 4245.0 + throughput: 235.57126030624264 estimated_peak_memory_range: - min: 733184 - max: 1949712 + min: 19931136 + max: 21179696 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 531 + layers_on_npu: 821 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 531 - job_id: jped8oev5 + total_layers: 821 + job_id: jp8qery8p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,36 +319,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:20:53Z' + timestamp: '2024-12-11T22:43:12Z' - torchscript_onnx_tflite: - inference_time: 205030.0 - throughput: 4.877335024142808 + inference_time: 40976.0 + throughput: 24.404529480671613 estimated_peak_memory_range: - min: 40185856 - max: 65787184 - primary_compute_unit: GPU + min: 2265088 + max: 70310608 + primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 408 - layers_on_cpu: 11 - total_layers: 419 - job_id: jp147227p + layers_on_npu: 983 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 983 + job_id: j56y8ol7p job_status: Passed torchscript_onnx_qnn: - inference_time: 219872.0 - throughput: 4.548100713142192 + inference_time: 5535.0 + throughput: 180.6684733514002 estimated_peak_memory_range: - min: 712704 - max: 6642320 + min: 18452480 + max: 24679728 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 531 + layers_on_npu: 821 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 531 - job_id: j5we8w2m5 + total_layers: 821 + job_id: j5q6l1qmp job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,36 +357,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:20:55Z' + timestamp: '2024-12-11T22:43:14Z' - torchscript_onnx_tflite: - inference_time: 198057.0 - throughput: 5.049051535669024 + inference_time: 38218.0 + throughput: 26.16568109267884 estimated_peak_memory_range: - min: 12288 - max: 82673592 - primary_compute_unit: GPU + min: 5779456 + max: 45519008 + primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 408 - layers_on_cpu: 11 - total_layers: 419 - job_id: j57yk2295 + layers_on_npu: 983 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 983 + job_id: jgo2ldqdp job_status: Passed torchscript_onnx_qnn: - inference_time: 202554.0 - throughput: 4.9369550835826495 + inference_time: 4172.0 + throughput: 239.69319271332694 estimated_peak_memory_range: - min: 167936 - max: 21255528 + min: 21241856 + max: 22798904 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 531 + layers_on_npu: 821 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 531 - job_id: jgdx8nezp + total_layers: 821 + job_id: j56y8m47p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,36 +395,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:20:57Z' + timestamp: '2024-12-11T22:43:16Z' - torchscript_onnx_tflite: - inference_time: 366822.0 - throughput: 2.726117844622187 + inference_time: 38342.0 + throughput: 26.08105993427573 estimated_peak_memory_range: - min: 40251392 - max: 66235984 - primary_compute_unit: GPU + min: 5636096 + max: 80440016 + primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 408 - layers_on_cpu: 11 - total_layers: 419 - job_id: jpxk399l5 + layers_on_npu: 983 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 983 + job_id: jgjvr348g job_status: Passed torchscript_onnx_qnn: - inference_time: 194703.0 - throughput: 5.136027693461323 + inference_time: 5638.0 + throughput: 177.367860943597 estimated_peak_memory_range: - min: 720896 - max: 6542096 + min: 18460672 + max: 29231088 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 531 + layers_on_npu: 821 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 531 - job_id: jpxk39nl5 + total_layers: 821 + job_id: jgo2lw6dp job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,36 +433,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:20:59Z' + timestamp: '2024-12-11T22:43:18Z' - torchscript_onnx_tflite: - inference_time: 324843.0 - throughput: 3.078410185843623 + inference_time: 42473.0 + throughput: 23.544369364066583 estimated_peak_memory_range: - min: 12320768 - max: 58238672 - primary_compute_unit: GPU + min: 2985984 + max: 78536672 + primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 408 - layers_on_cpu: 11 - total_layers: 419 - job_id: jgn6o00q5 + layers_on_npu: 983 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 983 + job_id: jgz3lzk65 job_status: Passed torchscript_onnx_qnn: - inference_time: 272669.0 - throughput: 3.6674502785428484 + inference_time: 5146.0 + throughput: 194.32568985619898 estimated_peak_memory_range: - min: 0 - max: 313784752 + min: 21233664 + max: 74169440 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 531 + layers_on_npu: 821 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 531 - job_id: jgn6o0lq5 + total_layers: 821 + job_id: jgjvryn8g job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,36 +471,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:21:02Z' + timestamp: '2024-12-11T22:43:20Z' - torchscript_onnx_qnn: - inference_time: 159232.0 - throughput: 6.280144694533762 + inference_time: 3907.0 + throughput: 255.9508574353724 estimated_peak_memory_range: - min: 491520 - max: 491520 + min: 21233664 + max: 21233664 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 531 + layers_on_npu: 821 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 531 - job_id: jp3jxnvng + total_layers: 821 + job_id: jprvld3eg job_status: Passed torchscript_onnx: - inference_time: 297710.0 - throughput: 3.358973497699103 + inference_time: 14267.0 + throughput: 70.09182028457279 estimated_peak_memory_range: - min: 139747328 - max: 139747328 + min: 112173056 + max: 112173056 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 380 + layers_on_npu: 844 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 380 - job_id: jglvo42m5 + total_layers: 844 + job_id: jp4ly4ll5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,56 +509,56 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:21:10Z' -- name: WhisperDecoder + timestamp: '2024-12-11T22:43:28Z' +- name: WhisperEncoder universal_assets: - torchscript_onnx_tflite: mq8k1o7pq - torchscript_onnx: mqpz04y0n + torchscript_onnx_tflite: mn7xl95oq + torchscript_onnx: mmr36ox2m performance_metrics: - torchscript_onnx_tflite: - inference_time: 38364.0 - throughput: 26.066103638828068 + inference_time: 196633.0 + throughput: 5.085616351273693 estimated_peak_memory_range: - min: 3129344 - max: 43493272 - primary_compute_unit: NPU + min: 36651008 + max: 114839456 + primary_compute_unit: GPU precision: fp16 layer_info: - layers_on_npu: 983 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 983 - job_id: jgkeoveng + layers_on_npu: 0 + layers_on_gpu: 408 + layers_on_cpu: 11 + total_layers: 419 + job_id: jpxkl0295 job_status: Passed torchscript_onnx_qnn: - inference_time: 4236.0 - throughput: 236.07176581680832 + inference_time: 266434.0 + throughput: 3.753274732203848 estimated_peak_memory_range: - min: 21233664 - max: 67939336 + min: 24576 + max: 81057032 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 821 + layers_on_npu: 531 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 821 - job_id: jpy1qzzlp + total_layers: 531 + job_id: jp14noxlp job_status: Passed torchscript_onnx: - inference_time: 16021.0 - throughput: 62.41807627488921 + inference_time: 300379.0 + throughput: 3.329127535546759 estimated_peak_memory_range: - min: 16384 - max: 122561352 + min: 95383552 + max: 99067320 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 844 + layers_on_npu: 380 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 844 - job_id: jpy1qzrlp + total_layers: 380 + job_id: j5welz6j5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -567,51 +567,51 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:21:04Z' + timestamp: '2024-12-11T22:43:23Z' - torchscript_onnx_tflite: - inference_time: 30646.0 - throughput: 32.63068589701756 + inference_time: 157913.0 + throughput: 6.332600862500238 estimated_peak_memory_range: - min: 16384 - max: 79497280 - primary_compute_unit: NPU + min: 38572032 + max: 80523552 + primary_compute_unit: GPU precision: fp16 layer_info: - layers_on_npu: 983 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 983 - job_id: j56yr22yp + layers_on_npu: 0 + layers_on_gpu: 408 + layers_on_cpu: 11 + total_layers: 419 + job_id: jgn6z18m5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3339.0 - throughput: 299.4908655286014 + inference_time: 186485.0 + throughput: 5.362361584041612 estimated_peak_memory_range: - min: 0 - max: 53908400 + min: 532480 + max: 300517680 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 821 + layers_on_npu: 531 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 821 - job_id: jp8q622op + total_layers: 531 + job_id: j57yenrr5 job_status: Passed torchscript_onnx: - inference_time: 13615.0 - throughput: 73.44840249724568 + inference_time: 211549.0 + throughput: 4.727037234872299 estimated_peak_memory_range: - min: 55476224 - max: 290099120 + min: 98062336 + max: 1029740864 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 844 + layers_on_npu: 380 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 844 - job_id: jp8q620op + total_layers: 380 + job_id: jp14n1zlp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -620,51 +620,51 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:21:06Z' + timestamp: '2024-12-11T22:43:24Z' - torchscript_onnx_tflite: - inference_time: 26759.0 - throughput: 37.37060428267125 + inference_time: 140336.0 + throughput: 7.125755330064987 estimated_peak_memory_range: - min: 4579328 - max: 78907952 - primary_compute_unit: NPU + min: 38686720 + max: 63279152 + primary_compute_unit: GPU precision: fp16 layer_info: - layers_on_npu: 983 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 983 - job_id: jgo2ozzkp + layers_on_npu: 0 + layers_on_gpu: 408 + layers_on_cpu: 11 + total_layers: 419 + job_id: jp2kronmp job_status: Passed torchscript_onnx_qnn: - inference_time: 2558.0 - throughput: 390.93041438623925 + inference_time: 152984.0 + throughput: 6.536631281702662 estimated_peak_memory_range: - min: 21209088 - max: 72507968 + min: 0 + max: 318748224 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 821 + layers_on_npu: 531 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 821 - job_id: j5q6z00op + total_layers: 531 + job_id: jpxklro95 job_status: Passed torchscript_onnx: - inference_time: 12032.0 - throughput: 83.11170212765957 + inference_time: 183176.0 + throughput: 5.4592304668733895 estimated_peak_memory_range: - min: 29728768 - max: 239746208 + min: 101126144 + max: 670683664 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 844 + layers_on_npu: 380 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 844 - job_id: j5q6z0eop + total_layers: 380 + job_id: j57yenyr5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -673,36 +673,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:21:08Z' + timestamp: '2024-12-11T22:43:26Z' - torchscript_onnx_tflite: - inference_time: 38530.0 - throughput: 25.95380223202699 + inference_time: 196378.0 + throughput: 5.092220106121867 estimated_peak_memory_range: - min: 20480 - max: 40651136 - primary_compute_unit: NPU + min: 35803136 + max: 115196272 + primary_compute_unit: GPU precision: fp16 layer_info: - layers_on_npu: 983 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 983 - job_id: jgjvoddeg + layers_on_npu: 0 + layers_on_gpu: 408 + layers_on_cpu: 11 + total_layers: 419 + job_id: jp0zmo7e5 job_status: Passed torchscript_onnx_qnn: - inference_time: 4204.0 - throughput: 237.8686964795433 + inference_time: 199737.0 + throughput: 5.006583657509625 estimated_peak_memory_range: - min: 21307392 - max: 22716576 + min: 335872 + max: 21543856 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 821 + layers_on_npu: 531 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 821 - job_id: j56yr2eyp + total_layers: 531 + job_id: jgn6zqvm5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -711,36 +711,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:20:48Z' + timestamp: '2024-12-11T22:43:06Z' - torchscript_onnx_tflite: - inference_time: 68010.0 - throughput: 14.703720041170417 + inference_time: 1160066.0 + throughput: 0.8620199195563011 estimated_peak_memory_range: - min: 3158016 - max: 77148144 - primary_compute_unit: NPU + min: 33222656 + max: 58380224 + primary_compute_unit: GPU precision: fp16 layer_info: - layers_on_npu: 983 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 983 - job_id: jgz3822x5 + layers_on_npu: 0 + layers_on_gpu: 408 + layers_on_cpu: 11 + total_layers: 419 + job_id: jgke26mog job_status: Passed torchscript_onnx_qnn: - inference_time: 26944.0 - throughput: 37.11401425178147 + inference_time: 935326.0 + throughput: 1.0691459448363458 estimated_peak_memory_range: - min: 21262336 - max: 27147872 + min: 634880 + max: 11267152 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 821 + layers_on_npu: 531 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 821 - job_id: jgjvodzeg + total_layers: 531 + job_id: jp0zm90e5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -749,36 +749,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:20:51Z' + timestamp: '2024-12-11T22:43:10Z' - torchscript_onnx_tflite: - inference_time: 38528.0 - throughput: 25.95514950166113 + inference_time: 200355.0 + throughput: 4.991140725212747 estimated_peak_memory_range: - min: 5840896 - max: 46172720 - primary_compute_unit: NPU + min: 35786752 + max: 128931816 + primary_compute_unit: GPU precision: fp16 layer_info: - layers_on_npu: 983 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 983 - job_id: jg9lk008g + layers_on_npu: 0 + layers_on_gpu: 408 + layers_on_cpu: 11 + total_layers: 419 + job_id: jglvywrl5 job_status: Passed torchscript_onnx_qnn: - inference_time: 4237.0 - throughput: 236.0160490913382 + inference_time: 217297.0 + throughput: 4.601996346014901 estimated_peak_memory_range: - min: 21266432 - max: 22560600 + min: 917504 + max: 2165496 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 821 + layers_on_npu: 531 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 821 - job_id: jgz382ox5 + total_layers: 531 + job_id: jgke20xog job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -787,36 +787,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:20:54Z' + timestamp: '2024-12-11T22:43:12Z' - torchscript_onnx_tflite: - inference_time: 41065.0 - throughput: 24.351637647631804 + inference_time: 204198.0 + throughput: 4.897207612219512 estimated_peak_memory_range: - min: 5787648 - max: 73522512 - primary_compute_unit: NPU + min: 40148992 + max: 69120864 + primary_compute_unit: GPU precision: fp16 layer_info: - layers_on_npu: 983 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 983 - job_id: jgdx8nnzp + layers_on_npu: 0 + layers_on_gpu: 408 + layers_on_cpu: 11 + total_layers: 419 + job_id: jp3jzo2zg job_status: Passed torchscript_onnx_qnn: - inference_time: 5798.0 - throughput: 172.47326664367023 + inference_time: 219469.0 + throughput: 4.556452164086956 estimated_peak_memory_range: - min: 18452480 - max: 24358528 + min: 749568 + max: 7053184 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 821 + layers_on_npu: 531 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 821 - job_id: jg9lk0j8g + total_layers: 531 + job_id: jglvy8ml5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -825,36 +825,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:20:56Z' + timestamp: '2024-12-11T22:43:14Z' - torchscript_onnx_tflite: - inference_time: 38144.0 - throughput: 26.216442953020135 + inference_time: 200209.0 + throughput: 4.9947804544251255 estimated_peak_memory_range: - min: 4091904 - max: 42344720 - primary_compute_unit: NPU + min: 40960 + max: 81838200 + primary_compute_unit: GPU precision: fp16 layer_info: - layers_on_npu: 983 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 983 - job_id: jp4lmnn15 + layers_on_npu: 0 + layers_on_gpu: 408 + layers_on_cpu: 11 + total_layers: 419 + job_id: jpv6l2xm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 4157.0 - throughput: 240.55809477988933 + inference_time: 213794.0 + throughput: 4.6773997399365745 estimated_peak_memory_range: - min: 22016000 - max: 23390080 + min: 253952 + max: 11342856 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 821 + layers_on_npu: 531 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 821 - job_id: jp4lmnk15 + total_layers: 531 + job_id: jp3jz70zg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -863,36 +863,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:20:58Z' + timestamp: '2024-12-11T22:43:16Z' - torchscript_onnx_tflite: - inference_time: 38291.0 - throughput: 26.11579744587501 + inference_time: 367334.0 + throughput: 2.722318108315593 estimated_peak_memory_range: - min: 5763072 - max: 79280240 - primary_compute_unit: NPU + min: 39268352 + max: 67576816 + primary_compute_unit: GPU precision: fp16 layer_info: - layers_on_npu: 983 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 983 - job_id: j5mnoee9p + layers_on_npu: 0 + layers_on_gpu: 408 + layers_on_cpu: 11 + total_layers: 419 + job_id: jped76305 job_status: Passed torchscript_onnx_qnn: - inference_time: 5574.0 - throughput: 179.4043774668102 + inference_time: 195132.0 + throughput: 5.1247360760920815 estimated_peak_memory_range: - min: 18505728 - max: 24312816 + min: 520192 + max: 6546704 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 821 + layers_on_npu: 531 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 821 - job_id: j5mnoeq9p + total_layers: 531 + job_id: jpv6lmkm5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -901,36 +901,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:21:00Z' + timestamp: '2024-12-11T22:43:18Z' - torchscript_onnx_tflite: - inference_time: 43741.0 - throughput: 22.861845865435175 + inference_time: 284098.0 + throughput: 3.519912142992911 estimated_peak_memory_range: - min: 1773568 - max: 80616576 - primary_compute_unit: NPU + min: 40390656 + max: 87126816 + primary_compute_unit: GPU precision: fp16 layer_info: - layers_on_npu: 983 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 983 - job_id: jprvo667g + layers_on_npu: 0 + layers_on_gpu: 408 + layers_on_cpu: 11 + total_layers: 419 + job_id: j5welynj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 5068.0 - throughput: 197.3164956590371 + inference_time: 283131.0 + throughput: 3.5319339810900257 estimated_peak_memory_range: - min: 19025920 - max: 69441120 + min: 0 + max: 311531520 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 821 + layers_on_npu: 531 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 821 - job_id: jprvo687g + total_layers: 531 + job_id: jped7xm05 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -939,36 +939,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:21:02Z' + timestamp: '2024-12-11T22:43:20Z' - torchscript_onnx_qnn: - inference_time: 3805.0 - throughput: 262.8120893561104 + inference_time: 160208.0 + throughput: 6.241885548786578 estimated_peak_memory_range: - min: 21233664 - max: 21233664 + min: 483328 + max: 483328 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 821 + layers_on_npu: 531 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 821 - job_id: jgo2ozkkp + total_layers: 531 + job_id: jp2krdymp job_status: Passed torchscript_onnx: - inference_time: 14392.0 - throughput: 69.48304613674263 + inference_time: 297781.0 + throughput: 3.358172616788848 estimated_peak_memory_range: - min: 112254976 - max: 112254976 + min: 139743232 + max: 139743232 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 844 + layers_on_npu: 380 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 844 - job_id: j56yr2zyp + total_layers: 380 + job_id: jpxklrk95 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -977,4 +977,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:21:10Z' + timestamp: '2024-12-11T22:43:28Z' diff --git a/qai_hub_models/models/whisper_small_en/export.py b/qai_hub_models/models/whisper_small_en/export.py deleted file mode 100644 index 0144a5ec..00000000 --- a/qai_hub_models/models/whisper_small_en/export.py +++ /dev/null @@ -1,247 +0,0 @@ -# --------------------------------------------------------------------- -# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -# --------------------------------------------------------------------- -# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. - - -from __future__ import annotations - -import os -import warnings -from collections.abc import Mapping -from pathlib import Path -from typing import Any, Optional, cast - -import qai_hub as hub -import torch - -from qai_hub_models.models.common import ExportResult, TargetRuntime -from qai_hub_models.models.whisper_small_en import Model -from qai_hub_models.utils.args import export_parser, get_model_kwargs -from qai_hub_models.utils.base_model import BaseModel -from qai_hub_models.utils.compare import torch_inference -from qai_hub_models.utils.input_spec import make_torch_inputs -from qai_hub_models.utils.printing import ( - print_inference_metrics, - print_profile_metrics_from_job, -) -from qai_hub_models.utils.qai_hub_helpers import ( - can_access_qualcomm_ai_hub, - export_without_hub_access, -) - -ALL_COMPONENTS = ["WhisperEncoder", "WhisperDecoder"] - - -def export_model( - device: Optional[str] = None, - chipset: Optional[str] = None, - components: Optional[list[str]] = None, - skip_profiling: bool = False, - skip_inferencing: bool = False, - skip_downloading: bool = False, - skip_summary: bool = False, - output_dir: Optional[str] = None, - target_runtime: TargetRuntime = TargetRuntime.TFLITE, - compile_options: str = "", - profile_options: str = "", - **additional_model_kwargs, -) -> Mapping[str, ExportResult] | list[str]: - """ - This function executes the following recipe: - - 1. Instantiates a PyTorch model and converts it to a traced TorchScript format - 2. Compiles the model to an asset that can be run on device - 3. Profiles the model performance on a real device - 4. Inferences the model on sample inputs - 5. Downloads the model asset to the local directory - 6. Summarizes the results from profiling and inference - - Each of the last 4 steps can be optionally skipped using the input options. - - Parameters: - device: Device for which to export the model. - Full list of available devices can be found by running `hub.get_devices()`. - Defaults to DEFAULT_DEVICE if not specified. - chipset: If set, will choose a random device with this chipset. - Overrides the `device` argument. - components: List of sub-components of the model that will be exported. - Each component is compiled and profiled separately. - Defaults to ALL_COMPONENTS if not specified. - skip_profiling: If set, skips profiling of compiled model on real devices. - skip_inferencing: If set, skips computing on-device outputs from sample data. - skip_downloading: If set, skips downloading of compiled model. - skip_summary: If set, skips waiting for and summarizing results - from profiling and inference. - output_dir: Directory to store generated assets (e.g. compiled model). - Defaults to `/build/`. - target_runtime: Which on-device runtime to target. Default is TFLite. - compile_options: Additional options to pass when submitting the compile job. - profile_options: Additional options to pass when submitting the profile job. - **additional_model_kwargs: Additional optional kwargs used to customize - `model_cls.from_pretrained` - - Returns: - A Mapping from component_name to a struct of: - * A CompileJob object containing metadata about the compile job submitted to hub. - * An InferenceJob containing metadata about the inference job (None if inferencing skipped). - * A ProfileJob containing metadata about the profile job (None if profiling skipped). - """ - model_name = "whisper_small_en" - output_path = Path(output_dir or Path.cwd() / "build" / model_name) - if not device and not chipset: - raise ValueError("Device or Chipset must be provided.") - hub_device = hub.Device( - name=device or "", attributes=f"chipset:{chipset}" if chipset else None - ) - component_arg = components - components = components or ALL_COMPONENTS - for component_name in components: - if component_name not in ALL_COMPONENTS: - raise ValueError(f"Invalid component {component_name}.") - if not can_access_qualcomm_ai_hub(): - return export_without_hub_access( - "whisper_small_en", - "Whisper-Small-En", - device or f"Device (Chipset {chipset})", - skip_profiling, - skip_inferencing, - skip_downloading, - skip_summary, - output_path, - target_runtime, - compile_options, - profile_options, - component_arg, - ) - - # On-device perf improves with I/O in channel_last format for runtimes - # that execute natively in channel_last format. - use_channel_last_format = target_runtime.channel_last_native_execution - - # 1. Instantiates a PyTorch model and converts it to a traced TorchScript format - model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs)) - components_dict: dict[str, BaseModel] = {} - if "WhisperEncoder" in components: - components_dict["WhisperEncoder"] = model.encoder # type: ignore - if "WhisperDecoder" in components: - components_dict["WhisperDecoder"] = model.decoder # type: ignore - - compile_jobs: dict[str, hub.client.CompileJob] = {} - for component_name, component in components_dict.items(): - input_spec = component.get_input_spec() - - # Trace the model - source_model = torch.jit.trace( - component.to("cpu"), make_torch_inputs(input_spec) - ) - - # 2. Compiles the model to an asset that can be run on device - model_compile_options = component.get_hub_compile_options( - target_runtime, compile_options, hub_device - ) - print(f"Optimizing model {component_name} to run on-device") - submitted_compile_job = hub.submit_compile_job( - model=source_model, - input_specs=input_spec, - device=hub_device, - name=f"{model_name}_{component_name}", - options=model_compile_options, - ) - compile_jobs[component_name] = cast( - hub.client.CompileJob, submitted_compile_job - ) - - # 3. Profiles the model performance on a real device - profile_jobs: dict[str, hub.client.ProfileJob] = {} - if not skip_profiling: - for component_name in components: - profile_options_all = components_dict[ - component_name - ].get_hub_profile_options(target_runtime, profile_options) - print(f"Profiling model {component_name} on a hosted device.") - submitted_profile_job = hub.submit_profile_job( - model=compile_jobs[component_name].get_target_model(), - device=hub_device, - name=f"{model_name}_{component_name}", - options=profile_options_all, - ) - profile_jobs[component_name] = cast( - hub.client.ProfileJob, submitted_profile_job - ) - - # 4. Inferences the model on sample inputs - inference_jobs: dict[str, hub.client.InferenceJob] = {} - if not skip_inferencing: - for component_name in components: - print( - f"Running inference for {component_name} on a hosted device with example inputs." - ) - profile_options_all = components_dict[ - component_name - ].get_hub_profile_options(target_runtime, profile_options) - sample_inputs = components_dict[component_name].sample_inputs( - use_channel_last_format=use_channel_last_format - ) - submitted_inference_job = hub.submit_inference_job( - model=compile_jobs[component_name].get_target_model(), - inputs=sample_inputs, - device=hub_device, - name=f"{model_name}_{component_name}", - options=profile_options_all, - ) - inference_jobs[component_name] = cast( - hub.client.InferenceJob, submitted_inference_job - ) - - # 5. Downloads the model asset to the local directory - if not skip_downloading: - os.makedirs(output_path, exist_ok=True) - for component_name, compile_job in compile_jobs.items(): - target_model: hub.Model = compile_job.get_target_model() # type: ignore - target_model.download(str(output_path / component_name)) - - # 6. Summarizes the results from profiling and inference - if not skip_summary and not skip_profiling: - for component_name in components: - profile_job = profile_jobs[component_name] - assert profile_job is not None and profile_job.wait().success - profile_data: dict[str, Any] = profile_job.download_profile() # type: ignore - print_profile_metrics_from_job(profile_job, profile_data) - - if not skip_summary and not skip_inferencing: - for component_name, component in components_dict.items(): - inference_job = inference_jobs[component_name] - sample_inputs = component.sample_inputs(use_channel_last_format=False) - torch_out = torch_inference( - component, - sample_inputs, - return_channel_last_output=use_channel_last_format, - ) - assert inference_job is not None and inference_job.wait().success - inference_result: hub.client.DatasetEntries = inference_job.download_output_data() # type: ignore - - print_inference_metrics( - inference_job, inference_result, torch_out, component.get_output_names() - ) - - return { - component_name: ExportResult( - compile_job=compile_jobs[component_name], - inference_job=inference_jobs.get(component_name, None), - profile_job=profile_jobs.get(component_name, None), - ) - for component_name in components - } - - -def main(): - warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, components=ALL_COMPONENTS) - args = parser.parse_args() - export_model(**vars(args)) - - -if __name__ == "__main__": - main() diff --git a/qai_hub_models/models/whisper_small_en/info.yaml b/qai_hub_models/models/whisper_small_en/info.yaml deleted file mode 100644 index c360f5bf..00000000 --- a/qai_hub_models/models/whisper_small_en/info.yaml +++ /dev/null @@ -1,40 +0,0 @@ -name: Whisper-Small-En -# id must match with the model dir name in qai_hub_models -id: whisper_small_en -status: public -headline: Automatic speech recognition (ASR) model for English transcription as well - as translation. -domain: Audio -description: OpenAI’s Whisper ASR (Automatic Speech Recognition) model is a state-of-the-art system designed for transcribing spoken language into written text. It exhibits robust performance in realistic, noisy environments, making it highly reliable for real-world applications. Specifically, it excels in long-form transcription, capable of accurately transcribing audio clips up to 30 seconds long. Time to the first token is the encoder's latency, while time to each additional token is decoder's latency, where we assume a mean decoded length specified below. -use_case: Speech Recognition -tags: - - foundation -research_paper: https://cdn.openai.com/papers/whisper.pdf -research_paper_title: Robust Speech Recognition via Large-Scale Weak Supervision -license: https://github.com/openai/whisper/blob/main/LICENSE -deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf -source_repo: https://github.com/openai/whisper/tree/main -technical_details: - Model checkpoint: small.en - Input resolution: 80x3000 (30 seconds audio) - Mean decoded sequence length: 112 tokens - Number of parameters (WhisperEncoder): 102M - Model size (WhisperEncoder): 390 MB - Number of parameters (WhisperDecoder): 139M - Model size (WhisperDecoder): 531 MB -applicable_scenarios: - - Smart Home - - Accessibility -related_models: - - whisper_tiny_en - - whisper_base_en - - huggingface_wavlm_base_plus -form_factors: - - Phone - - Tablet - - IoT -has_static_banner: true -has_animated_banner: true -license_type: mit -deploy_license_type: AI Model Hub License -dataset: [] diff --git a/qai_hub_models/models/whisper_small_en/model.py b/qai_hub_models/models/whisper_small_en/model.py deleted file mode 100644 index 54433dc6..00000000 --- a/qai_hub_models/models/whisper_small_en/model.py +++ /dev/null @@ -1,16 +0,0 @@ -# --------------------------------------------------------------------- -# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -# --------------------------------------------------------------------- -from __future__ import annotations - -from qai_hub_models.models._shared.whisper.model import Whisper - -MODEL_ID = __name__.split(".")[-2] -WHISPER_VERSION = "small.en" - - -class WhisperSmallEn(Whisper): - @classmethod - def from_pretrained(cls): - return Whisper.from_pretrained(WHISPER_VERSION) diff --git a/qai_hub_models/models/whisper_small_en/perf.yaml b/qai_hub_models/models/whisper_small_en/perf.yaml deleted file mode 100644 index 41bef41a..00000000 --- a/qai_hub_models/models/whisper_small_en/perf.yaml +++ /dev/null @@ -1,845 +0,0 @@ -aggregated: - supported_devices: - - Snapdragon 8 Elite QRD - - Samsung Galaxy S24 - - Samsung Galaxy S24 Ultra - - Samsung Galaxy S24+ - - Samsung Galaxy S23 - - Samsung Galaxy S23 Ultra - - Samsung Galaxy S23+ - - Samsung Galaxy S22 5G - - Samsung Galaxy S22 Ultra 5G - - Samsung Galaxy S22+ 5G - - Samsung Galaxy Tab S8 - - Xiaomi 12 - - Xiaomi 12 Pro - - Samsung Galaxy S21 - - Samsung Galaxy S21 Ultra - - Samsung Galaxy S21+ - - Snapdragon X Elite CRD - - Snapdragon X Plus 8-Core CRD - - SA8775P ADP - - QCS8450 (Proxy) - - XR2 Gen 2 (Proxy) - - QCS8550 (Proxy) - - SA7255P ADP - - SA8255 (Proxy) - - SA8295P ADP - - SA8650 (Proxy) - supported_chipsets: - - Snapdragon® 8 Elite - - Snapdragon® 8 Gen 3 - - Snapdragon® 8 Gen 2 - - Snapdragon® 8 Gen 1 - - Snapdragon® 888 - - Snapdragon® X Elite - - Snapdragon® X Plus 8-Core - - SA8775P - - QCS8450 Proxy - - QCS8550 Proxy - - SA7255P - - SA8255P Proxy - - SA8295P - - SA8650P Proxy -models: -- name: WhisperEncoder - universal_assets: - torchscript_onnx_tflite: mn1zy4o8m - torchscript_onnx: mqy3de97m - performance_metrics: - - torchscript_onnx_tflite: - inference_time: 722700.0 - throughput: 1.3837000138370001 - estimated_peak_memory_range: - min: 72499200 - max: 471156792 - primary_compute_unit: GPU - precision: fp16 - layer_info: - layers_on_npu: 0 - layers_on_gpu: 900 - layers_on_cpu: 11 - total_layers: 911 - job_id: jg9lklewg - job_status: Passed - torchscript_onnx_qnn: - inference_time: 820248.0 - throughput: 1.2191434785577044 - estimated_peak_memory_range: - min: 135168 - max: 218862464 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 1329 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 1329 - job_id: jgjvodnvg - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:19:17Z' - - torchscript_onnx_tflite: - inference_time: 518950.0 - throughput: 1.9269679159841988 - estimated_peak_memory_range: - min: 116445184 - max: 211265232 - primary_compute_unit: GPU - precision: fp16 - layer_info: - layers_on_npu: 0 - layers_on_gpu: 900 - layers_on_cpu: 11 - total_layers: 911 - job_id: jgdx8xlrp - job_status: Passed - torchscript_onnx: - inference_time: 778511.0 - throughput: 1.2845033660410707 - estimated_peak_memory_range: - min: 119001088 - max: 4169811136 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 884 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 884 - job_id: jgz3823o5 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S24 - os: '14' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:19:19Z' - - torchscript_onnx_qnn: - inference_time: 516678.0 - throughput: 1.9354414161237754 - estimated_peak_memory_range: - min: 0 - max: 949699472 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 1329 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 1329 - job_id: jp1472z8p - job_status: Passed - reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone - os_name: Android - manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:19:21Z' - - torchscript_onnx_tflite: - inference_time: 696316.0 - throughput: 1.4361295733546264 - estimated_peak_memory_range: - min: 89346048 - max: 487527888 - primary_compute_unit: GPU - precision: fp16 - layer_info: - layers_on_npu: 0 - layers_on_gpu: 900 - layers_on_cpu: 11 - total_layers: 911 - job_id: j5mnonydp - job_status: Passed - torchscript_onnx_qnn: - inference_time: 644566.0 - throughput: 1.5514315058504482 - estimated_peak_memory_range: - min: 1011712 - max: 2213960 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 1329 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 1329 - job_id: jp4lmnr85 - job_status: Passed - reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot - os_name: Android - manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:19:00Z' - - torchscript_onnx_tflite: - inference_time: 4426504.0 - throughput: 0.22591191604028824 - estimated_peak_memory_range: - min: 113668096 - max: 148619152 - primary_compute_unit: GPU - precision: fp16 - layer_info: - layers_on_npu: 0 - layers_on_gpu: 900 - layers_on_cpu: 11 - total_layers: 911 - job_id: jprvovj0g - job_status: Passed - torchscript_onnx_qnn: - inference_time: 3210318.0 - throughput: 0.3114956213060513 - estimated_peak_memory_range: - min: 860160 - max: 8826560 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 1329 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 1329 - job_id: jprvo630g - job_status: Passed - reference_device_info: - name: SA7255P ADP - os: '14' - form_factor: Auto - os_name: Android - manufacturer: Qualcomm - chipset: SA7255P - timestamp: '2024-11-26T01:19:04Z' - - torchscript_onnx_tflite: - inference_time: 705299.0 - throughput: 1.4178383919444093 - estimated_peak_memory_range: - min: 27181056 - max: 425640120 - primary_compute_unit: GPU - precision: fp16 - layer_info: - layers_on_npu: 0 - layers_on_gpu: 900 - layers_on_cpu: 11 - total_layers: 911 - job_id: jpy1q108p - job_status: Passed - torchscript_onnx_qnn: - inference_time: 638347.0 - throughput: 1.5665460948355674 - estimated_peak_memory_range: - min: 909312 - max: 2205176 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 1329 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 1329 - job_id: jpy1qz38p - job_status: Passed - reference_device_info: - name: SA8255 (Proxy) - os: '13' - form_factor: Auto - os_name: Android - manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-11-26T01:19:06Z' - - torchscript_onnx_qnn: - inference_time: 700683.0 - throughput: 1.4271789097209437 - estimated_peak_memory_range: - min: 2977792 - max: 8937728 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 1329 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 1329 - job_id: jp8q62ykp - job_status: Passed - reference_device_info: - name: SA8295P ADP - os: '14' - form_factor: Auto - os_name: Android - manufacturer: Qualcomm - chipset: SA8295P - timestamp: '2024-11-26T01:19:09Z' - - torchscript_onnx_tflite: - inference_time: 709764.0 - throughput: 1.4089190209703506 - estimated_peak_memory_range: - min: 82300928 - max: 466405832 - primary_compute_unit: GPU - precision: fp16 - layer_info: - layers_on_npu: 0 - layers_on_gpu: 900 - layers_on_cpu: 11 - total_layers: 911 - job_id: j5q6z6onp - job_status: Passed - torchscript_onnx_qnn: - inference_time: 678710.0 - throughput: 1.4733833301410029 - estimated_peak_memory_range: - min: 1564672 - max: 2939112 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 1329 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 1329 - job_id: j5q6z0qnp - job_status: Passed - reference_device_info: - name: SA8650 (Proxy) - os: '13' - form_factor: Auto - os_name: Android - manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-11-26T01:19:11Z' - - torchscript_onnx_tflite: - inference_time: 1293650.0 - throughput: 0.7730066092065088 - estimated_peak_memory_range: - min: 113053696 - max: 147082896 - primary_compute_unit: GPU - precision: fp16 - layer_info: - layers_on_npu: 0 - layers_on_gpu: 900 - layers_on_cpu: 11 - total_layers: 911 - job_id: j56yryl6p - job_status: Passed - torchscript_onnx_qnn: - inference_time: 603983.0 - throughput: 1.6556757392178256 - estimated_peak_memory_range: - min: 737280 - max: 6739248 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 1329 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 1329 - job_id: j56yr246p - job_status: Passed - reference_device_info: - name: SA8775P ADP - os: '14' - form_factor: Auto - os_name: Android - manufacturer: Qualcomm - chipset: SA8775P - timestamp: '2024-11-26T01:19:13Z' - - torchscript_onnx_tflite: - inference_time: 969375.0 - throughput: 1.0315925209542232 - estimated_peak_memory_range: - min: 115101696 - max: 214869520 - primary_compute_unit: GPU - precision: fp16 - layer_info: - layers_on_npu: 0 - layers_on_gpu: 900 - layers_on_cpu: 11 - total_layers: 911 - job_id: jgo2o2qqp - job_status: Passed - reference_device_info: - name: QCS8450 (Proxy) - os: '13' - form_factor: Xr - os_name: Android - manufacturer: Qualcomm - chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:19:15Z' - - torchscript_onnx_qnn: - inference_time: 504049.0 - throughput: 1.9839341016448797 - estimated_peak_memory_range: - min: 483328 - max: 483328 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 1329 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 1329 - job_id: j5mnoexdp - job_status: Passed - torchscript_onnx: - inference_time: 1342641.0 - throughput: 0.74480073228808 - estimated_peak_memory_range: - min: 248946688 - max: 248946688 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 884 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 884 - job_id: jgdx8nxrp - job_status: Passed - reference_device_info: - name: Snapdragon X Elite CRD - os: '11' - form_factor: Compute - os_name: Windows - manufacturer: Qualcomm - chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:19:23Z' -- name: WhisperDecoder - universal_assets: - torchscript_onnx_tflite: mn1zylzzm - torchscript_onnx: mn7lpzkvq - performance_metrics: - - torchscript_onnx_tflite: - inference_time: 28657.0 - throughput: 34.89548801339987 - estimated_peak_memory_range: - min: 16793600 - max: 104581352 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 2573 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 2573 - job_id: jp1474x8p - job_status: Passed - torchscript_onnx_qnn: - inference_time: 11929.0 - throughput: 83.82932349735938 - estimated_peak_memory_range: - min: 63651840 - max: 148257184 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 2255 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 2255 - job_id: jped8omo5 - job_status: Passed - torchscript_onnx: - inference_time: 58778.0 - throughput: 17.013168192180746 - estimated_peak_memory_range: - min: 125800448 - max: 128624472 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 2302 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 2302 - job_id: jped8odo5 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S23 - os: '13' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:19:18Z' - - torchscript_onnx_tflite: - inference_time: 23885.0 - throughput: 41.86728072011723 - estimated_peak_memory_range: - min: 16781312 - max: 154839984 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 2573 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 2573 - job_id: j57yky3v5 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 9450.0 - throughput: 105.82010582010582 - estimated_peak_memory_range: - min: 467423232 - max: 579311456 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 2255 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 2255 - job_id: jg9lk0nwg - job_status: Passed - torchscript_onnx: - inference_time: 47995.0 - throughput: 20.835503698301906 - estimated_peak_memory_range: - min: 89071616 - max: 1190590640 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 2302 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 2302 - job_id: j5we8we35 - job_status: Passed - reference_device_info: - name: Samsung Galaxy S24 - os: '14' - form_factor: Phone - os_name: Android - manufacturer: Samsung - chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:19:20Z' - - torchscript_onnx_tflite: - inference_time: 16628.0 - throughput: 60.13952369497233 - estimated_peak_memory_range: - min: 16338944 - max: 275702160 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 2573 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 2573 - job_id: j56y3wknp - job_status: Passed - torchscript_onnx_qnn: - inference_time: 8060.0 - throughput: 124.06947890818859 - estimated_peak_memory_range: - min: 55312384 - max: 197216032 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 2255 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 2255 - job_id: jgdx8n1rp - job_status: Passed - torchscript_onnx: - inference_time: 44088.0 - throughput: 22.68190890945382 - estimated_peak_memory_range: - min: 72331264 - max: 730413376 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 2302 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 2302 - job_id: jp147248p - job_status: Passed - reference_device_info: - name: Snapdragon 8 Elite QRD - os: '15' - form_factor: Phone - os_name: Android - manufacturer: Qualcomm - chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:19:22Z' - - torchscript_onnx_tflite: - inference_time: 28650.0 - throughput: 34.904013961605585 - estimated_peak_memory_range: - min: 16650240 - max: 105408024 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 2573 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 2573 - job_id: jgn6o68k5 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 11920.0 - throughput: 83.89261744966443 - estimated_peak_memory_range: - min: 63717376 - max: 74030208 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 2255 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 2255 - job_id: jpxk39o35 - job_status: Passed - reference_device_info: - name: QCS8550 (Proxy) - os: '12' - form_factor: Iot - os_name: Android - manufacturer: Qualcomm - chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:19:00Z' - - torchscript_onnx_qnn: - inference_time: 74962.0 - throughput: 13.34009231343881 - estimated_peak_memory_range: - min: 59043840 - max: 67144592 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 2255 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 2255 - job_id: jp2k4xyrp - job_status: Passed - reference_device_info: - name: SA7255P ADP - os: '14' - form_factor: Auto - os_name: Android - manufacturer: Qualcomm - chipset: SA7255P - timestamp: '2024-11-26T01:19:05Z' - - torchscript_onnx_tflite: - inference_time: 29533.0 - throughput: 33.86042731859276 - estimated_peak_memory_range: - min: 16695296 - max: 103494832 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 2573 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 2573 - job_id: jp0zdz795 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 12125.0 - throughput: 82.47422680412372 - estimated_peak_memory_range: - min: 59432960 - max: 65072384 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 2255 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 2255 - job_id: jp0zd4095 - job_status: Passed - reference_device_info: - name: SA8255 (Proxy) - os: '13' - form_factor: Auto - os_name: Android - manufacturer: Qualcomm - chipset: SA8255P Proxy - timestamp: '2024-11-26T01:19:07Z' - - torchscript_onnx_tflite: - inference_time: 30807.0 - throughput: 32.460155159541664 - estimated_peak_memory_range: - min: 16793600 - max: 169542048 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 2573 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 2573 - job_id: jgkeoemwg - job_status: Passed - torchscript_onnx_qnn: - inference_time: 14596.0 - throughput: 68.51192107426692 - estimated_peak_memory_range: - min: 59547648 - max: 65298544 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 2255 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 2255 - job_id: jgkeovxwg - job_status: Passed - reference_device_info: - name: SA8295P ADP - os: '14' - form_factor: Auto - os_name: Android - manufacturer: Qualcomm - chipset: SA8295P - timestamp: '2024-11-26T01:19:09Z' - - torchscript_onnx_tflite: - inference_time: 29430.0 - throughput: 33.97893306150187 - estimated_peak_memory_range: - min: 16691200 - max: 103572280 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 2573 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 2573 - job_id: jglvovrj5 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 12052.0 - throughput: 82.97378028542981 - estimated_peak_memory_range: - min: 67915776 - max: 69332368 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 2255 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 2255 - job_id: jglvo4mj5 - job_status: Passed - reference_device_info: - name: SA8650 (Proxy) - os: '13' - form_factor: Auto - os_name: Android - manufacturer: Qualcomm - chipset: SA8650P Proxy - timestamp: '2024-11-26T01:19:12Z' - - torchscript_onnx_tflite: - inference_time: 33020.0 - throughput: 30.28467595396729 - estimated_peak_memory_range: - min: 16818176 - max: 182363520 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 2573 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 2573 - job_id: jp3jxj23g - job_status: Passed - reference_device_info: - name: SA8775P ADP - os: '14' - form_factor: Auto - os_name: Android - manufacturer: Qualcomm - chipset: SA8775P - timestamp: '2024-11-26T01:19:14Z' - - torchscript_onnx_tflite: - inference_time: 34145.0 - throughput: 29.28686484111876 - estimated_peak_memory_range: - min: 16830464 - max: 145963840 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 2573 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 2573 - job_id: jpv6eqkk5 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 15967.0 - throughput: 62.629172668629046 - estimated_peak_memory_range: - min: 59494400 - max: 181129152 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 2255 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 2255 - job_id: jpv6eq6k5 - job_status: Passed - reference_device_info: - name: QCS8450 (Proxy) - os: '13' - form_factor: Xr - os_name: Android - manufacturer: Qualcomm - chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:19:16Z' - - torchscript_onnx: - inference_time: 52917.0 - throughput: 18.897518755787367 - estimated_peak_memory_range: - min: 242864128 - max: 242864128 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 2302 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 2302 - job_id: j5we8wem5 - job_status: Passed - reference_device_info: - name: Snapdragon X Elite CRD - os: '11' - form_factor: Compute - os_name: Windows - manufacturer: Qualcomm - chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:19:24Z' diff --git a/qai_hub_models/models/whisper_small_en/requirements.txt b/qai_hub_models/models/whisper_small_en/requirements.txt deleted file mode 100644 index d414a308..00000000 --- a/qai_hub_models/models/whisper_small_en/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -openai-whisper==20231117 -audio2numpy==0.1.2 -samplerate==0.2.1 -scipy>=1.8.1,<2 # 1.8.1 is for AIMET diff --git a/qai_hub_models/models/whisper_small_en/test.py b/qai_hub_models/models/whisper_small_en/test.py deleted file mode 100644 index d94d4bf5..00000000 --- a/qai_hub_models/models/whisper_small_en/test.py +++ /dev/null @@ -1,22 +0,0 @@ -# --------------------------------------------------------------------- -# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -# --------------------------------------------------------------------- -from qai_hub_models.models._shared.whisper.test_utils import ( - run_test_transcribe, - run_test_wrapper_numerics, -) -from qai_hub_models.models.whisper_tiny_en.demo import main as demo_main -from qai_hub_models.models.whisper_tiny_en.model import WHISPER_VERSION - - -def test_numerics(): - run_test_wrapper_numerics(WHISPER_VERSION) - - -def test_transcribe(): - run_test_transcribe(WHISPER_VERSION) - - -def test_demo(): - demo_main(is_test=True) diff --git a/qai_hub_models/models/whisper_tiny_en/export.py b/qai_hub_models/models/whisper_tiny_en/export.py index b0e3b591..2cc678f6 100644 --- a/qai_hub_models/models/whisper_tiny_en/export.py +++ b/qai_hub_models/models/whisper_tiny_en/export.py @@ -31,7 +31,7 @@ export_without_hub_access, ) -ALL_COMPONENTS = ["WhisperEncoder", "WhisperDecoder"] +ALL_COMPONENTS = ["WhisperDecoder", "WhisperEncoder"] def export_model( @@ -123,10 +123,10 @@ def export_model( # 1. Instantiates a PyTorch model and converts it to a traced TorchScript format model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs)) components_dict: dict[str, BaseModel] = {} - if "WhisperEncoder" in components: - components_dict["WhisperEncoder"] = model.encoder # type: ignore if "WhisperDecoder" in components: components_dict["WhisperDecoder"] = model.decoder # type: ignore + if "WhisperEncoder" in components: + components_dict["WhisperEncoder"] = model.encoder # type: ignore compile_jobs: dict[str, hub.client.CompileJob] = {} for component_name, component in components_dict.items(): diff --git a/qai_hub_models/models/whisper_tiny_en/perf.yaml b/qai_hub_models/models/whisper_tiny_en/perf.yaml index 8815c95f..787dc354 100644 --- a/qai_hub_models/models/whisper_tiny_en/perf.yaml +++ b/qai_hub_models/models/whisper_tiny_en/perf.yaml @@ -42,39 +42,39 @@ aggregated: - SA8295P - SA8650P Proxy models: -- name: WhisperEncoder +- name: WhisperDecoder universal_assets: - torchscript_onnx_tflite: mn7lpz2jq + torchscript_onnx_tflite: mqp3zo80m performance_metrics: - torchscript_onnx_tflite: - inference_time: 102517.0 - throughput: 9.75447974482281 + inference_time: 4050.0 + throughput: 246.91358024691357 estimated_peak_memory_range: - min: 16072704 - max: 106643648 - primary_compute_unit: GPU + min: 2981888 + max: 41533984 + primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 260 - layers_on_cpu: 11 - total_layers: 271 - job_id: jgkeoej2g + layers_on_npu: 557 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 557 + job_id: jglvyw925 job_status: Passed torchscript_onnx_qnn: - inference_time: 140918.0 - throughput: 7.096325522644375 + inference_time: 2401.0 + throughput: 416.49312786339027 estimated_peak_memory_range: - min: 45056 - max: 54323792 + min: 16384 + max: 126682880 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 313 + layers_on_npu: 447 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 313 - job_id: jpxk3kv35 + total_layers: 447 + job_id: jgn6z14v5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -83,36 +83,36 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:17:04Z' + timestamp: '2024-12-11T22:39:29Z' - torchscript_onnx_tflite: - inference_time: 79931.0 - throughput: 12.510790556855287 + inference_time: 3050.0 + throughput: 327.8688524590164 estimated_peak_memory_range: - min: 20946944 - max: 50992464 - primary_compute_unit: GPU + min: 12288 + max: 65282400 + primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 260 - layers_on_cpu: 11 - total_layers: 271 - job_id: jglvovj85 + layers_on_npu: 557 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 557 + job_id: jp3jzolmg job_status: Passed torchscript_onnx_qnn: - inference_time: 110457.0 - throughput: 9.053296758014431 + inference_time: 1710.0 + throughput: 584.7953216374269 estimated_peak_memory_range: - min: 32768 - max: 199741952 + min: 4624384 + max: 51246736 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 313 + layers_on_npu: 447 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 313 - job_id: jgn6o62k5 + total_layers: 447 + job_id: jp2kro1xp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -121,36 +121,36 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:17:06Z' + timestamp: '2024-12-11T22:39:31Z' - torchscript_onnx_tflite: - inference_time: 65236.0 - throughput: 15.32895947023116 + inference_time: 2620.0 + throughput: 381.6793893129771 estimated_peak_memory_range: - min: 20885504 - max: 40414368 - primary_compute_unit: GPU + min: 8192 + max: 59037824 + primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 260 - layers_on_cpu: 11 - total_layers: 271 - job_id: jp3jxjylg + layers_on_npu: 557 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 557 + job_id: jpv6l2yz5 job_status: Passed torchscript_onnx_qnn: - inference_time: 89706.0 - throughput: 11.14752636389985 + inference_time: 1546.0 + throughput: 646.8305304010349 estimated_peak_memory_range: - min: 81920 - max: 203678768 + min: 0 + max: 44035568 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 313 + layers_on_npu: 447 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 313 - job_id: jp2k4k8rp + total_layers: 447 + job_id: jp0zmow25 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -159,36 +159,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:17:08Z' + timestamp: '2024-12-11T22:39:33Z' - torchscript_onnx_tflite: - inference_time: 100710.0 - throughput: 9.92950054612253 + inference_time: 4107.0 + throughput: 243.48672997321646 estimated_peak_memory_range: - min: 20803584 - max: 114031768 - primary_compute_unit: GPU + min: 2985984 + max: 43232776 + primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 260 - layers_on_cpu: 11 - total_layers: 271 - job_id: jpv6e6jj5 + layers_on_npu: 557 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 557 + job_id: jped76085 job_status: Passed torchscript_onnx_qnn: - inference_time: 104587.0 - throughput: 9.561417767026494 + inference_time: 2368.0 + throughput: 422.2972972972973 estimated_peak_memory_range: - min: 765952 - max: 1936432 + min: 10657792 + max: 12331320 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 313 + layers_on_npu: 447 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 313 - job_id: jp0zdzy95 + total_layers: 447 + job_id: jgke261yg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -197,36 +197,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:17:10Z' + timestamp: '2024-12-11T22:39:35Z' - torchscript_onnx_tflite: - inference_time: 532945.0 - throughput: 1.8763662291606076 + inference_time: 18307.0 + throughput: 54.6239143497023 estimated_peak_memory_range: - min: 20922368 - max: 45653184 - primary_compute_unit: GPU + min: 2510848 + max: 59824608 + primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 260 - layers_on_cpu: 11 - total_layers: 271 - job_id: jped8dj15 + layers_on_npu: 557 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 557 + job_id: j5wely045 job_status: Passed torchscript_onnx_qnn: - inference_time: 466605.0 - throughput: 2.1431403435453973 + inference_time: 15533.0 + throughput: 64.37906392841047 estimated_peak_memory_range: - min: 1748992 - max: 7339872 + min: 9244672 + max: 19403424 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 313 + layers_on_npu: 447 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 313 - job_id: j56yry66p + total_layers: 447 + job_id: jp3jzodxg job_status: Passed reference_device_info: name: SA7255P ADP @@ -235,36 +235,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:17:15Z' + timestamp: '2024-12-11T22:39:39Z' - torchscript_onnx_tflite: - inference_time: 102188.0 - throughput: 9.785884839707206 + inference_time: 4007.0 + throughput: 249.56326428749688 estimated_peak_memory_range: - min: 20779008 - max: 122386512 - primary_compute_unit: GPU + min: 2977792 + max: 42153640 + primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 260 - layers_on_cpu: 11 - total_layers: 271 - job_id: j5we8eo65 + layers_on_npu: 557 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 557 + job_id: jp14noknp job_status: Passed torchscript_onnx_qnn: - inference_time: 108187.0 - throughput: 9.243254734857238 + inference_time: 2339.0 + throughput: 427.53313381787086 estimated_peak_memory_range: - min: 651264 - max: 2085872 + min: 2928640 + max: 4209024 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 313 + layers_on_npu: 447 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 313 - job_id: jgo2o2yqp + total_layers: 447 + job_id: jpv6l2875 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -273,36 +273,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:17:16Z' + timestamp: '2024-12-11T22:39:41Z' - torchscript_onnx_tflite: - inference_time: 107711.0 - throughput: 9.284102830722953 + inference_time: 5188.0 + throughput: 192.75250578257518 estimated_peak_memory_range: - min: 20963328 - max: 42654624 - primary_compute_unit: GPU + min: 2973696 + max: 57399664 + primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 260 - layers_on_cpu: 11 - total_layers: 271 - job_id: jp147402p + layers_on_npu: 557 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 557 + job_id: j5wely0z5 job_status: Passed torchscript_onnx_qnn: - inference_time: 128097.0 - throughput: 7.8065840730071745 + inference_time: 3586.0 + throughput: 278.8622420524261 estimated_peak_memory_range: - min: 692224 - max: 6419600 + min: 1146880 + max: 7463440 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 313 + layers_on_npu: 447 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 313 - job_id: jgjvovmvg + total_layers: 447 + job_id: jped76q75 job_status: Passed reference_device_info: name: SA8295P ADP @@ -311,36 +311,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:17:18Z' + timestamp: '2024-12-11T22:39:43Z' - torchscript_onnx_tflite: - inference_time: 99594.0 - throughput: 10.040765507962327 + inference_time: 4064.0 + throughput: 246.06299212598427 estimated_peak_memory_range: - min: 21336064 - max: 60906688 - primary_compute_unit: GPU + min: 3006464 + max: 42566672 + primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 260 - layers_on_cpu: 11 - total_layers: 271 - job_id: j5we8eo35 + layers_on_npu: 557 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 557 + job_id: jp14nokkp job_status: Passed torchscript_onnx_qnn: - inference_time: 107074.0 - throughput: 9.33933541289202 + inference_time: 2422.0 + throughput: 412.8819157720892 estimated_peak_memory_range: - min: 737280 - max: 1981624 + min: 10665984 + max: 11886496 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 313 + layers_on_npu: 447 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 313 - job_id: jgz3839o5 + total_layers: 447 + job_id: j5welykz5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -349,36 +349,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:17:20Z' + timestamp: '2024-12-11T22:39:45Z' - torchscript_onnx_tflite: - inference_time: 178344.0 - throughput: 5.607141255102499 + inference_time: 5443.0 + throughput: 183.72221201543266 estimated_peak_memory_range: - min: 20922368 - max: 45040784 - primary_compute_unit: GPU + min: 0 + max: 57652768 + primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 260 - layers_on_cpu: 11 - total_layers: 271 - job_id: jp147408p + layers_on_npu: 557 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 557 + job_id: j57yeo1q5 job_status: Passed torchscript_onnx_qnn: - inference_time: 119681.0 - throughput: 8.355545157543803 + inference_time: 3465.0 + throughput: 288.6002886002886 estimated_peak_memory_range: - min: 499712 - max: 6200256 + min: 9273344 + max: 15403904 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 313 + layers_on_npu: 447 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 313 - job_id: jg9lkl1wg + total_layers: 447 + job_id: jp14no9kp job_status: Passed reference_device_info: name: SA8775P ADP @@ -387,36 +387,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:17:22Z' + timestamp: '2024-12-11T22:39:47Z' - torchscript_onnx_tflite: - inference_time: 156253.0 - throughput: 6.399877122359251 + inference_time: 4664.0 + throughput: 214.4082332761578 estimated_peak_memory_range: - min: 21123072 - max: 57526432 - primary_compute_unit: GPU + min: 2973696 + max: 64845504 + primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 0 - layers_on_gpu: 260 - layers_on_cpu: 11 - total_layers: 271 - job_id: j57ykyzv5 + layers_on_npu: 557 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 557 + job_id: jpxkl08j5 job_status: Passed torchscript_onnx_qnn: - inference_time: 158505.0 - throughput: 6.308949244503328 + inference_time: 2892.0 + throughput: 345.78146611341634 estimated_peak_memory_range: - min: 126976 - max: 203600672 + min: 4628480 + max: 52028000 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 313 + layers_on_npu: 447 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 313 - job_id: jgdx8x9rp + total_layers: 447 + job_id: j57yeomq5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -425,21 +425,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:17:24Z' + timestamp: '2024-12-11T22:39:49Z' - torchscript_onnx_qnn: - inference_time: 97524.0 - throughput: 10.25388622287847 + inference_time: 2285.0 + throughput: 437.636761487965 estimated_peak_memory_range: - min: 503808 - max: 503808 + min: 10629120 + max: 10629120 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 313 + layers_on_npu: 447 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 313 - job_id: jgkeoezwg + total_layers: 447 + job_id: jglvywde5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -448,40 +448,40 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:17:12Z' -- name: WhisperDecoder + timestamp: '2024-12-11T22:39:37Z' +- name: WhisperEncoder universal_assets: - torchscript_onnx_tflite: mq8k1o4gq + torchscript_onnx_tflite: mn1wz588m performance_metrics: - torchscript_onnx_tflite: - inference_time: 3993.0 - throughput: 250.4382669671926 + inference_time: 99892.0 + throughput: 10.01081167661074 estimated_peak_memory_range: - min: 2977792 - max: 52372816 - primary_compute_unit: NPU + min: 20791296 + max: 53333736 + primary_compute_unit: GPU precision: fp16 layer_info: - layers_on_npu: 557 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 557 - job_id: j5q6z6j4p + layers_on_npu: 0 + layers_on_gpu: 260 + layers_on_cpu: 11 + total_layers: 271 + job_id: j56y8o9np job_status: Passed torchscript_onnx_qnn: - inference_time: 2409.0 - throughput: 415.11000415110004 + inference_time: 139106.0 + throughput: 7.188762526418702 estimated_peak_memory_range: min: 16384 - max: 84277384 + max: 57206112 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 447 + layers_on_npu: 313 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 447 - job_id: j5mnonrdp + total_layers: 313 + job_id: jprvlxrvg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -490,36 +490,36 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:17:05Z' + timestamp: '2024-12-11T22:39:30Z' - torchscript_onnx_tflite: - inference_time: 3077.0 - throughput: 324.99187520311995 + inference_time: 81683.0 + throughput: 12.242449469289815 estimated_peak_memory_range: - min: 12288 - max: 61279824 - primary_compute_unit: NPU + min: 17825792 + max: 48857472 + primary_compute_unit: GPU precision: fp16 layer_info: - layers_on_npu: 557 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 557 - job_id: j56yryk0p + layers_on_npu: 0 + layers_on_gpu: 260 + layers_on_cpu: 11 + total_layers: 271 + job_id: jgo2ld71p job_status: Passed torchscript_onnx_qnn: - inference_time: 1726.0 - throughput: 579.3742757821552 + inference_time: 109268.0 + throughput: 9.15181022806311 estimated_peak_memory_range: - min: 0 - max: 43791184 + min: 28672 + max: 200232256 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 447 + layers_on_npu: 313 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 447 - job_id: jprvovk0g + total_layers: 313 + job_id: jpy1o8lrp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -528,36 +528,36 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:17:06Z' + timestamp: '2024-12-11T22:39:32Z' - torchscript_onnx_tflite: - inference_time: 2624.0 - throughput: 381.0975609756098 + inference_time: 74784.0 + throughput: 13.371844244758236 estimated_peak_memory_range: - min: 2228224 - max: 59923664 - primary_compute_unit: NPU + min: 25718784 + max: 46244496 + primary_compute_unit: GPU precision: fp16 layer_info: - layers_on_npu: 557 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 557 - job_id: jgo2o2jxp + layers_on_npu: 0 + layers_on_gpu: 260 + layers_on_cpu: 11 + total_layers: 271 + job_id: jgjvr361g job_status: Passed torchscript_onnx_qnn: - inference_time: 1421.0 - throughput: 703.7297677691766 + inference_time: 98550.0 + throughput: 10.147133434804667 estimated_peak_memory_range: - min: 10448896 - max: 52477872 + min: 0 + max: 204368240 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 447 + layers_on_npu: 313 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 447 - job_id: jpy1q1e8p + total_layers: 313 + job_id: jp8qejnzp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -566,36 +566,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:17:09Z' + timestamp: '2024-12-11T22:39:34Z' - torchscript_onnx_tflite: - inference_time: 4056.0 - throughput: 246.5483234714004 + inference_time: 96819.0 + throughput: 10.328551214121195 estimated_peak_memory_range: - min: 2977792 - max: 42915608 - primary_compute_unit: NPU + min: 13688832 + max: 57403944 + primary_compute_unit: GPU precision: fp16 layer_info: - layers_on_npu: 557 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 557 - job_id: jgjvovjxg + layers_on_npu: 0 + layers_on_gpu: 260 + layers_on_cpu: 11 + total_layers: 271 + job_id: jgz3lzq45 job_status: Passed torchscript_onnx_qnn: - inference_time: 2377.0 - throughput: 420.69835927639883 + inference_time: 104143.0 + throughput: 9.60218161566308 estimated_peak_memory_range: - min: 4829184 - max: 6830608 + min: 81920 + max: 5338944 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 447 + layers_on_npu: 313 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 447 - job_id: jp8q6qokp + total_layers: 313 + job_id: j5q6l4n7p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -604,36 +604,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:17:11Z' + timestamp: '2024-12-11T22:39:36Z' - torchscript_onnx_tflite: - inference_time: 18142.0 - throughput: 55.120714364458166 + inference_time: 507645.0 + throughput: 1.9698805267460529 estimated_peak_memory_range: - min: 3076096 - max: 58890384 - primary_compute_unit: NPU + min: 20955136 + max: 47231136 + primary_compute_unit: GPU precision: fp16 layer_info: - layers_on_npu: 557 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 557 - job_id: jgz3831k5 + layers_on_npu: 0 + layers_on_gpu: 260 + layers_on_cpu: 11 + total_layers: 271 + job_id: jg9lzo7mg job_status: Passed torchscript_onnx_qnn: - inference_time: 15519.0 - throughput: 64.43714156840002 + inference_time: 464481.0 + throughput: 2.152940593910192 estimated_peak_memory_range: - min: 9240576 - max: 15255952 + min: 565248 + max: 10914720 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 447 + layers_on_npu: 313 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 447 - job_id: jp3jxjk3g + total_layers: 313 + job_id: jgo2ldx4p job_status: Passed reference_device_info: name: SA7255P ADP @@ -642,36 +642,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:17:15Z' + timestamp: '2024-12-11T22:39:40Z' - torchscript_onnx_tflite: - inference_time: 3952.0 - throughput: 253.03643724696357 + inference_time: 100562.0 + throughput: 9.944114078876712 estimated_peak_memory_range: - min: 2117632 - max: 42486792 - primary_compute_unit: NPU + min: 18948096 + max: 153931088 + primary_compute_unit: GPU precision: fp16 layer_info: - layers_on_npu: 557 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 557 - job_id: jg9lklvlg + layers_on_npu: 0 + layers_on_gpu: 260 + layers_on_cpu: 11 + total_layers: 271 + job_id: jgdxd6y6p job_status: Passed torchscript_onnx_qnn: - inference_time: 2333.0 - throughput: 428.6326618088298 + inference_time: 106657.0 + throughput: 9.375849686377828 estimated_peak_memory_range: - min: 2105344 - max: 4240464 + min: 196608 + max: 5069992 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 447 + layers_on_npu: 313 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 447 - job_id: jpv6e6ok5 + total_layers: 313 + job_id: jgjvr397g job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -680,36 +680,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:17:17Z' + timestamp: '2024-12-11T22:39:41Z' - torchscript_onnx_tflite: - inference_time: 4786.0 - throughput: 208.94274968658587 + inference_time: 103764.0 + throughput: 9.637253768166223 estimated_peak_memory_range: - min: 2973696 - max: 30581968 - primary_compute_unit: NPU + min: 21757952 + max: 43855344 + primary_compute_unit: GPU precision: fp16 layer_info: - layers_on_npu: 557 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 557 - job_id: j5wed40m5 + layers_on_npu: 0 + layers_on_gpu: 260 + layers_on_cpu: 11 + total_layers: 271 + job_id: jg9lzo7qg job_status: Passed torchscript_onnx_qnn: - inference_time: 3598.0 - throughput: 277.9321845469705 + inference_time: 127685.0 + throughput: 7.831773505110232 estimated_peak_memory_range: - min: 1425408 - max: 7179456 + min: 4345856 + max: 10730352 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 447 + layers_on_npu: 313 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 447 - job_id: jped8d1o5 + total_layers: 313 + job_id: jgz3lz6z5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -718,36 +718,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:17:19Z' + timestamp: '2024-12-11T22:39:44Z' - torchscript_onnx_tflite: - inference_time: 3997.0 - throughput: 250.1876407305479 + inference_time: 109307.0 + throughput: 9.14854492392985 estimated_peak_memory_range: - min: 2977792 - max: 52425984 - primary_compute_unit: NPU + min: 20779008 + max: 62607576 + primary_compute_unit: GPU precision: fp16 layer_info: - layers_on_npu: 557 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 557 - job_id: jg9lklvwg + layers_on_npu: 0 + layers_on_gpu: 260 + layers_on_cpu: 11 + total_layers: 271 + job_id: jgdxd6ykp job_status: Passed torchscript_onnx_qnn: - inference_time: 2440.0 - throughput: 409.8360655737705 + inference_time: 107644.0 + throughput: 9.289881461112556 estimated_peak_memory_range: - min: 8867840 - max: 10777976 + min: 327680 + max: 5598304 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 447 + layers_on_npu: 313 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 447 - job_id: j5we8ev35 + total_layers: 313 + job_id: jg9lzorqg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -756,36 +756,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:17:21Z' + timestamp: '2024-12-11T22:39:46Z' - torchscript_onnx_tflite: - inference_time: 5308.0 - throughput: 188.39487565938205 + inference_time: 177953.0 + throughput: 5.619461318438015 estimated_peak_memory_range: - min: 3006464 - max: 59540816 - primary_compute_unit: NPU + min: 21008384 + max: 49143920 + primary_compute_unit: GPU precision: fp16 layer_info: - layers_on_npu: 557 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 557 - job_id: jgdx8xwrp + layers_on_npu: 0 + layers_on_gpu: 260 + layers_on_cpu: 11 + total_layers: 271 + job_id: jp4lye6q5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3448.0 - throughput: 290.0232018561485 + inference_time: 119437.0 + throughput: 8.372614851344224 estimated_peak_memory_range: - min: 9273344 - max: 15006224 + min: 430080 + max: 6523520 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 447 + layers_on_npu: 313 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 447 - job_id: jp1474l8p + total_layers: 313 + job_id: jgdxd6kkp job_status: Passed reference_device_info: name: SA8775P ADP @@ -794,36 +794,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:17:23Z' + timestamp: '2024-12-11T22:39:47Z' - torchscript_onnx_tflite: - inference_time: 4622.0 - throughput: 216.3565556036348 + inference_time: 185481.0 + throughput: 5.39138779713286 estimated_peak_memory_range: - min: 2998272 - max: 61497392 - primary_compute_unit: NPU + min: 11665408 + max: 49920352 + primary_compute_unit: GPU precision: fp16 layer_info: - layers_on_npu: 557 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 557 - job_id: jp4lmlq85 + layers_on_npu: 0 + layers_on_gpu: 260 + layers_on_cpu: 11 + total_layers: 271 + job_id: j5mn091yp job_status: Passed torchscript_onnx_qnn: - inference_time: 2923.0 - throughput: 342.11426616489905 + inference_time: 165323.0 + throughput: 6.0487651445957304 estimated_peak_memory_range: - min: 9232384 - max: 53999824 + min: 12288 + max: 205278816 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 447 + layers_on_npu: 313 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 447 - job_id: j57ykywv5 + total_layers: 313 + job_id: jp4lye7q5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -832,21 +832,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:17:25Z' + timestamp: '2024-12-11T22:39:49Z' - torchscript_onnx_qnn: - inference_time: 2290.0 - throughput: 436.68122270742356 + inference_time: 97277.0 + throughput: 10.279922283787535 estimated_peak_memory_range: - min: 10625024 - max: 10625024 + min: 491520 + max: 491520 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 447 + layers_on_npu: 313 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 447 - job_id: j5q6z68np + total_layers: 313 + job_id: j56y8oxvp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -855,4 +855,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:17:13Z' + timestamp: '2024-12-11T22:39:38Z' diff --git a/qai_hub_models/models/wideresnet50/perf.yaml b/qai_hub_models/models/wideresnet50/perf.yaml index 938ad3a7..4a2cf9c2 100644 --- a/qai_hub_models/models/wideresnet50/perf.yaml +++ b/qai_hub_models/models/wideresnet50/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: WideResNet50 universal_assets: - torchscript_onnx_tflite: mq8k1o5gq - torchscript_onnx: mm5d9l86n + torchscript_onnx_tflite: mnl6vokjn + torchscript_onnx: mm5ed2w2m performance_metrics: - torchscript_onnx_tflite: - inference_time: 4884.0 - throughput: 204.75020475020474 + inference_time: 4862.0 + throughput: 205.67667626491155 estimated_peak_memory_range: - min: 16384 - max: 615228528 + min: 0 + max: 594396216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jgn6o64r5 + job_id: jp2krod6p job_status: Passed torchscript_onnx_qnn: - inference_time: 5693.0 - throughput: 175.65431231336729 + inference_time: 5679.0 + throughput: 176.08733932030287 estimated_peak_memory_range: min: 622592 - max: 384067904 + max: 394330368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jp3jxjdlg + job_id: jpv6l2nz5 job_status: Passed torchscript_onnx: - inference_time: 5227.0 - throughput: 191.3143294432753 + inference_time: 5208.0 + throughput: 192.01228878648234 estimated_peak_memory_range: - min: 16384 - max: 169400232 + min: 12288 + max: 1966999440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jp4lmlzv5 + job_id: j5mn09l7p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:16:19Z' + timestamp: '2024-12-11T22:38:42Z' - torchscript_onnx_tflite: - inference_time: 3660.0 - throughput: 273.224043715847 + inference_time: 3606.0 + throughput: 277.31558513588465 estimated_peak_memory_range: min: 16384 - max: 30620608 + max: 28517312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jprvovr9g + job_id: jpy1o820p job_status: Passed torchscript_onnx_qnn: - inference_time: 4152.0 - throughput: 240.84778420038535 + inference_time: 4145.0 + throughput: 241.25452352231605 estimated_peak_memory_range: min: 618496 - max: 30918000 + max: 33812544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgo2o2xxp + job_id: jgjvr381g job_status: Passed torchscript_onnx: - inference_time: 3763.0 - throughput: 265.7454158915759 + inference_time: 3750.0 + throughput: 266.6666666666667 estimated_peak_memory_range: min: 0 - max: 113938432 + max: 116102464 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jpxk3kw15 + job_id: jgn6z1wj5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:16:20Z' + timestamp: '2024-12-11T22:38:43Z' - torchscript_onnx_tflite: - inference_time: 3410.0 - throughput: 293.2551319648094 + inference_time: 3413.0 + throughput: 292.99736302373276 estimated_peak_memory_range: min: 12288 - max: 33795312 + max: 29315584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: j5mnw8k9p + job_id: jp0zmox05 job_status: Passed torchscript_onnx_qnn: - inference_time: 4012.0 - throughput: 249.25224327018944 + inference_time: 3978.0 + throughput: 251.38260432378078 estimated_peak_memory_range: - min: 614400 - max: 27973728 + min: 0 + max: 29155824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jpv6e68j5 + job_id: jped76n85 job_status: Passed torchscript_onnx: - inference_time: 3552.0 - throughput: 281.5315315315315 + inference_time: 3566.0 + throughput: 280.42624789680315 estimated_peak_memory_range: min: 0 - max: 40732944 + max: 42388336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: j5mnonjwp + job_id: jprvlx7kg job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:16:21Z' + timestamp: '2024-12-11T22:38:44Z' - torchscript_onnx_tflite: - inference_time: 4849.0 - throughput: 206.22808826562178 + inference_time: 4873.0 + throughput: 205.21239482864766 estimated_peak_memory_range: - min: 20480 - max: 615159016 + min: 24576 + max: 531500496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jpy1q1l7p + job_id: jp8qejkqp job_status: Passed torchscript_onnx_qnn: - inference_time: 5029.0 - throughput: 198.8466892026248 + inference_time: 5055.0 + throughput: 197.82393669634027 estimated_peak_memory_range: - min: 626688 - max: 1879552 + min: 634880 + max: 1917152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgjvov9xg + job_id: jgz3lz045 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:16:10Z' + timestamp: '2024-12-11T22:38:33Z' - torchscript_onnx_tflite: - inference_time: 180896.0 - throughput: 5.528038209800106 + inference_time: 180919.0 + throughput: 5.52733543740569 estimated_peak_memory_range: - min: 114688 - max: 26780368 + min: 94208 + max: 28316416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jp0zdzw65 + job_id: jgke26kvg job_status: Passed torchscript_onnx_qnn: - inference_time: 184744.0 - throughput: 5.412895682674404 + inference_time: 184791.0 + throughput: 5.411518959256674 estimated_peak_memory_range: - min: 724992 - max: 6310288 + min: 675840 + max: 10897504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgz3836k5 + job_id: jg9lzoqmg job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:16:12Z' + timestamp: '2024-12-11T22:38:35Z' - torchscript_onnx_tflite: - inference_time: 4881.0 - throughput: 204.8760499897562 + inference_time: 4877.0 + throughput: 205.0440844781628 estimated_peak_memory_range: min: 20480 - max: 594137184 + max: 531404344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jp8q6qnxp + job_id: j5q6l4dep job_status: Passed torchscript_onnx_qnn: - inference_time: 5039.0 - throughput: 198.45207382417146 + inference_time: 5048.0 + throughput: 198.09825673534073 estimated_peak_memory_range: min: 634880 - max: 1953096 + max: 1837952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j5we8ej65 + job_id: jp14nomnp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:16:14Z' + timestamp: '2024-12-11T22:38:36Z' - torchscript_onnx_tflite: - inference_time: 8247.0 - throughput: 121.25621438098703 + inference_time: 8206.0 + throughput: 121.86205215695833 estimated_peak_memory_range: min: 20480 - max: 23413664 + max: 22528656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jp0z169n5 + job_id: jglvywq25 job_status: Passed torchscript_onnx_qnn: - inference_time: 8059.0 - throughput: 124.08487405385283 + inference_time: 8030.0 + throughput: 124.53300124533001 estimated_peak_memory_range: - min: 659456 - max: 6431152 + min: 647168 + max: 6779808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jg9lkl6lg + job_id: jgdxd6m6p job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:16:15Z' + timestamp: '2024-12-11T22:38:37Z' - torchscript_onnx_tflite: - inference_time: 4868.0 - throughput: 205.42317173377157 + inference_time: 4877.0 + throughput: 205.0440844781628 estimated_peak_memory_range: - min: 28672 - max: 594556888 + min: 24576 + max: 542257064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: j5q6z6n4p + job_id: j56y8o0np job_status: Passed torchscript_onnx_qnn: - inference_time: 5040.0 - throughput: 198.4126984126984 + inference_time: 5113.0 + throughput: 195.57989438685703 estimated_peak_memory_range: - min: 671744 - max: 2182840 + min: 630784 + max: 1772752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jp1474r2p + job_id: j57yeo8n5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:16:16Z' + timestamp: '2024-12-11T22:38:39Z' - torchscript_onnx_tflite: - inference_time: 8492.0 - throughput: 117.75788977861517 + inference_time: 8494.0 + throughput: 117.73016246762421 estimated_peak_memory_range: min: 20480 - max: 26717008 + max: 27770544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jglvovd85 + job_id: jp3jzormg job_status: Passed torchscript_onnx_qnn: - inference_time: 8662.0 - throughput: 115.44677903486493 + inference_time: 8708.0 + throughput: 114.83693155718879 estimated_peak_memory_range: - min: 393216 - max: 6134240 + min: 618496 + max: 6448448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jgdx8xjep + job_id: jp4lye225 job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:16:17Z' + timestamp: '2024-12-11T22:38:40Z' - torchscript_onnx_tflite: - inference_time: 7097.0 - throughput: 140.90460758066789 + inference_time: 7132.0 + throughput: 140.21312394840157 estimated_peak_memory_range: min: 16384 - max: 26557552 + max: 28807744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: j56yryx0p + job_id: jgo2ld91p job_status: Passed torchscript_onnx_qnn: - inference_time: 7293.0 - throughput: 137.1177841766077 + inference_time: 7240.0 + throughput: 138.12154696132598 estimated_peak_memory_range: - min: 618496 - max: 22991520 + min: 647168 + max: 27822960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j57ykyql5 + job_id: jpxkl0z85 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:16:18Z' + timestamp: '2024-12-11T22:38:41Z' - torchscript_onnx_qnn: - inference_time: 4899.0 - throughput: 204.12329046744233 + inference_time: 4913.0 + throughput: 203.5416242621616 estimated_peak_memory_range: min: 602112 max: 602112 @@ -485,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jped8dq15 + job_id: j5welyr45 job_status: Passed torchscript_onnx: - inference_time: 4632.0 - throughput: 215.88946459412782 + inference_time: 4650.0 + throughput: 215.05376344086022 estimated_peak_memory_range: - min: 139456512 - max: 139456512 + min: 140496896 + max: 140496896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -500,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jgn6o6jr5 + job_id: jp2kroz6p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -509,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:16:22Z' + timestamp: '2024-12-11T22:38:45Z' diff --git a/qai_hub_models/models/wideresnet50_quantized/evaluate.py b/qai_hub_models/models/wideresnet50_quantized/evaluate.py index 865f4679..1ee84dc4 100644 --- a/qai_hub_models/models/wideresnet50_quantized/evaluate.py +++ b/qai_hub_models/models/wideresnet50_quantized/evaluate.py @@ -25,6 +25,7 @@ def main(): model_cls=Model, default_split_size=2500, supported_datasets=SUPPORTED_DATASETS, + supports_onnx=False, is_hub_quantized=True, ) args = parser.parse_args() diff --git a/qai_hub_models/models/wideresnet50_quantized/export.py b/qai_hub_models/models/wideresnet50_quantized/export.py index 7102351f..4db802ef 100644 --- a/qai_hub_models/models/wideresnet50_quantized/export.py +++ b/qai_hub_models/models/wideresnet50_quantized/export.py @@ -237,7 +237,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, is_hub_quantized=True) + parser = export_parser(model_cls=Model, supports_onnx=False, is_hub_quantized=True) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/wideresnet50_quantized/info.yaml b/qai_hub_models/models/wideresnet50_quantized/info.yaml index 8eb99295..8a2f48f0 100644 --- a/qai_hub_models/models/wideresnet50_quantized/info.yaml +++ b/qai_hub_models/models/wideresnet50_quantized/info.yaml @@ -11,6 +11,7 @@ use_case: Image Classification tags: - backbone - quantized +imsdk_supported: true research_paper: https://arxiv.org/abs/1605.07146 research_paper_title: Wide Residual Networks license: https://github.com/pytorch/vision/blob/main/LICENSE diff --git a/qai_hub_models/models/wideresnet50_quantized/perf.yaml b/qai_hub_models/models/wideresnet50_quantized/perf.yaml index 919f1e2d..10cf0fa7 100644 --- a/qai_hub_models/models/wideresnet50_quantized/perf.yaml +++ b/qai_hub_models/models/wideresnet50_quantized/perf.yaml @@ -50,14 +50,14 @@ aggregated: models: - name: WideResNet50-Quantized universal_assets: - torchscript_onnx_tflite: mqpzyd1vn + torchscript_onnx_tflite: mqyv3kvrq performance_metrics: - torchscript_onnx_tflite: - inference_time: 1789.0 - throughput: 558.9714924538848 + inference_time: 1781.0 + throughput: 561.4823133071309 estimated_peak_memory_range: - min: 16384 - max: 135758688 + min: 20480 + max: 146572616 primary_compute_unit: NPU precision: int8 layer_info: @@ -65,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jped8r8v5 + job_id: jp8qeonop job_status: Passed torchscript_onnx_qnn: - inference_time: 2044.0 - throughput: 489.23679060665364 + inference_time: 2030.0 + throughput: 492.61083743842363 estimated_peak_memory_range: - min: 16384 - max: 134544264 + min: 0 + max: 467652912 primary_compute_unit: NPU precision: int8 layer_info: @@ -80,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jpxk37385 + job_id: jp14n0r7p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -89,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T03:40:14Z' + timestamp: '2024-12-12T01:08:48Z' - torchscript_onnx_tflite: - inference_time: 1349.0 - throughput: 741.2898443291327 + inference_time: 1345.0 + throughput: 743.4944237918215 estimated_peak_memory_range: min: 16384 - max: 23710432 + max: 28717920 primary_compute_unit: NPU precision: int8 layer_info: @@ -103,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jgz38x8x5 + job_id: jgke2z1ng job_status: Passed torchscript_onnx_qnn: - inference_time: 1531.0 - throughput: 653.1678641410842 + inference_time: 1527.0 + throughput: 654.8788474132285 estimated_peak_memory_range: - min: 0 - max: 23365616 + min: 167936 + max: 24911136 primary_compute_unit: NPU precision: int8 layer_info: @@ -118,7 +118,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: j5mnowo7p + job_id: jgdxdwjzp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -127,13 +127,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T03:40:15Z' + timestamp: '2024-12-12T01:08:50Z' - torchscript_onnx_tflite: - inference_time: 1180.0 - throughput: 847.457627118644 + inference_time: 1249.0 + throughput: 800.640512409928 estimated_peak_memory_range: min: 12288 - max: 17867920 + max: 21834848 primary_compute_unit: NPU precision: int8 layer_info: @@ -141,14 +141,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: j5we8d8m5 + job_id: j5q6l8nop job_status: Passed torchscript_onnx_qnn: - inference_time: 1448.0 - throughput: 690.6077348066299 + inference_time: 1454.0 + throughput: 687.757909215956 estimated_peak_memory_range: min: 0 - max: 19077200 + max: 22037744 primary_compute_unit: NPU precision: int8 layer_info: @@ -156,7 +156,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jgn6o9oj5 + job_id: j5weloj45 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -165,13 +165,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T03:40:17Z' + timestamp: '2024-12-12T01:08:52Z' - torchscript_onnx_tflite: - inference_time: 7945.0 - throughput: 125.86532410320956 + inference_time: 8337.0 + throughput: 119.94722322178242 estimated_peak_memory_range: - min: 12288 - max: 26875536 + min: 36864 + max: 28743472 primary_compute_unit: NPU precision: int8 layer_info: @@ -179,14 +179,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jg9lk3k8g + job_id: jglvynjm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 9760.0 - throughput: 102.45901639344262 + inference_time: 9319.0 + throughput: 107.30765103551883 estimated_peak_memory_range: - min: 200704 - max: 8698304 + min: 163840 + max: 7575200 primary_compute_unit: NPU precision: int8 layer_info: @@ -194,7 +194,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jprvo4okg + job_id: jg9lzv6mg job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -203,13 +203,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T03:39:57Z' + timestamp: '2024-12-12T01:08:54Z' - torchscript_onnx_tflite: - inference_time: 23919.0 - throughput: 41.80776788327271 + inference_time: 23686.0 + throughput: 42.21903233977877 estimated_peak_memory_range: min: 40960 - max: 4365256 + max: 2462808 primary_compute_unit: NPU precision: int8 layer_info: @@ -217,7 +217,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jp147d77p + job_id: j56y86kyp job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -226,13 +226,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T03:39:36Z' + timestamp: '2024-12-12T01:08:34Z' - torchscript_onnx_tflite: - inference_time: 1852.0 - throughput: 539.9568034557235 + inference_time: 1778.0 + throughput: 562.429696287964 estimated_peak_memory_range: - min: 20480 - max: 6224064 + min: 40960 + max: 136169632 primary_compute_unit: NPU precision: int8 layer_info: @@ -240,14 +240,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jgdx8r8zp + job_id: jp3jzkyng job_status: Passed torchscript_onnx_qnn: - inference_time: 1918.0 - throughput: 521.376433785193 + inference_time: 1921.0 + throughput: 520.5622071837585 estimated_peak_memory_range: - min: 176128 - max: 1862992 + min: 192512 + max: 1426576 primary_compute_unit: NPU precision: int8 layer_info: @@ -255,7 +255,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jpy1q4q0p + job_id: jp14n0rnp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -264,13 +264,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T03:39:59Z' + timestamp: '2024-12-12T01:08:56Z' - torchscript_onnx_tflite: - inference_time: 22349.0 - throughput: 44.74473130788849 + inference_time: 22321.0 + throughput: 44.800860176515386 estimated_peak_memory_range: - min: 32768 - max: 17878720 + min: 53248 + max: 19412112 primary_compute_unit: NPU precision: int8 layer_info: @@ -278,14 +278,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: j5we8d845 + job_id: jgo2lyjkp job_status: Passed torchscript_onnx_qnn: - inference_time: 23757.0 - throughput: 42.09285684219388 + inference_time: 23753.0 + throughput: 42.099945270071146 estimated_peak_memory_range: - min: 126976 - max: 5789552 + min: 122880 + max: 10955904 primary_compute_unit: NPU precision: int8 layer_info: @@ -293,7 +293,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: j5q6z7mep + job_id: j57yezqn5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -302,13 +302,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T03:40:02Z' + timestamp: '2024-12-12T01:08:59Z' - torchscript_onnx_tflite: - inference_time: 1787.0 - throughput: 559.5970900951315 + inference_time: 1783.0 + throughput: 560.8524957936063 estimated_peak_memory_range: min: 16384 - max: 146229248 + max: 8724760 primary_compute_unit: NPU precision: int8 layer_info: @@ -316,14 +316,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jg9lk3kmg + job_id: jgjvrxjeg job_status: Passed torchscript_onnx_qnn: - inference_time: 1921.0 - throughput: 520.5622071837585 + inference_time: 1926.0 + throughput: 519.2107995846313 estimated_peak_memory_range: min: 184320 - max: 1283912 + max: 1758488 primary_compute_unit: NPU precision: int8 layer_info: @@ -331,7 +331,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jglvo0125 + job_id: jp4lyqz25 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -340,13 +340,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T03:40:04Z' + timestamp: '2024-12-12T01:09:01Z' - torchscript_onnx_tflite: - inference_time: 2725.0 - throughput: 366.9724770642202 + inference_time: 2761.0 + throughput: 362.18761318362914 estimated_peak_memory_range: min: 16384 - max: 18402688 + max: 20187856 primary_compute_unit: NPU precision: int8 layer_info: @@ -354,14 +354,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jp147d7np + job_id: jped79jv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2886.0 - throughput: 346.5003465003465 + inference_time: 3074.0 + throughput: 325.30904359141186 estimated_peak_memory_range: min: 0 - max: 6034624 + max: 6001184 primary_compute_unit: NPU precision: int8 layer_info: @@ -369,7 +369,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: j56yr3dnp + job_id: jpxklvw85 job_status: Passed reference_device_info: name: SA8295P ADP @@ -378,13 +378,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T03:40:06Z' + timestamp: '2024-12-12T01:09:03Z' - torchscript_onnx_tflite: inference_time: 1782.0 throughput: 561.1672278338945 estimated_peak_memory_range: - min: 16384 - max: 135339152 + min: 20480 + max: 136003200 primary_compute_unit: NPU precision: int8 layer_info: @@ -392,14 +392,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jgdx8r86p + job_id: jgz3le1x5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1923.0 - throughput: 520.0208008320333 + inference_time: 1921.0 + throughput: 520.5622071837585 estimated_peak_memory_range: - min: 196608 - max: 1584432 + min: 200704 + max: 1840400 primary_compute_unit: NPU precision: int8 layer_info: @@ -407,7 +407,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jp3jx4wmg + job_id: j5mn0rj7p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -416,13 +416,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T03:40:08Z' + timestamp: '2024-12-12T01:09:05Z' - torchscript_onnx_tflite: - inference_time: 2075.0 - throughput: 481.9277108433735 + inference_time: 2072.0 + throughput: 482.6254826254826 estimated_peak_memory_range: - min: 16384 - max: 18601952 + min: 20480 + max: 20837440 primary_compute_unit: NPU precision: int8 layer_info: @@ -430,14 +430,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: j57ykjkn5 + job_id: j5welojm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2457.0 - throughput: 407.000407000407 + inference_time: 2419.0 + throughput: 413.39396444811905 estimated_peak_memory_range: - min: 167936 - max: 5901376 + min: 0 + max: 5980336 primary_compute_unit: NPU precision: int8 layer_info: @@ -445,7 +445,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jgo2o141p + job_id: jgn6z2jj5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -454,13 +454,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T03:40:10Z' + timestamp: '2024-12-12T01:09:07Z' - torchscript_onnx_tflite: - inference_time: 2188.0 - throughput: 457.0383912248629 + inference_time: 2182.0 + throughput: 458.29514207149407 estimated_peak_memory_range: - min: 20480 - max: 23736384 + min: 16384 + max: 29349632 primary_compute_unit: NPU precision: int8 layer_info: @@ -468,14 +468,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 82 - job_id: jp4lmxm25 + job_id: jg9lzv68g job_status: Passed torchscript_onnx_qnn: - inference_time: 2525.0 - throughput: 396.03960396039605 + inference_time: 2475.0 + throughput: 404.04040404040404 estimated_peak_memory_range: - min: 167936 - max: 23589872 + min: 122880 + max: 26873712 primary_compute_unit: NPU precision: int8 layer_info: @@ -483,7 +483,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jpv6e19z5 + job_id: jprvlkzkg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -492,13 +492,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T03:40:12Z' + timestamp: '2024-12-12T01:09:09Z' - torchscript_onnx_qnn: - inference_time: 2175.0 - throughput: 459.7701149425287 + inference_time: 1845.0 + throughput: 542.0054200542005 estimated_peak_memory_range: - min: 229376 - max: 229376 + min: 286720 + max: 286720 primary_compute_unit: NPU precision: int8 layer_info: @@ -506,7 +506,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jp0zd1v05 + job_id: jgdxdwj6p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -515,4 +515,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T03:40:19Z' + timestamp: '2024-12-12T01:08:57Z' diff --git a/qai_hub_models/models/xlsr/perf.yaml b/qai_hub_models/models/xlsr/perf.yaml index 3549390b..4f1728b0 100644 --- a/qai_hub_models/models/xlsr/perf.yaml +++ b/qai_hub_models/models/xlsr/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: XLSR universal_assets: - torchscript_onnx_tflite: mmx71vorq - torchscript_onnx: mqy3degvm + torchscript_onnx_tflite: mno63o7gn + torchscript_onnx: mnz1vjzoq performance_metrics: - torchscript_onnx_tflite: - inference_time: 2542.0 - throughput: 393.3910306845004 + inference_time: 2585.0 + throughput: 386.84719535783364 estimated_peak_memory_range: - min: 28672 - max: 2907856 + min: 16384 + max: 36377608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: jp1474k2p + job_id: j57yeoon5 job_status: Passed torchscript_onnx_qnn: - inference_time: 965.0 - throughput: 1036.2694300518135 + inference_time: 966.0 + throughput: 1035.1966873706003 estimated_peak_memory_range: min: 217088 - max: 15165760 + max: 2839256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jp0zdz865 + job_id: jgke260vg job_status: Passed torchscript_onnx: - inference_time: 1458.0 - throughput: 685.8710562414266 + inference_time: 1511.0 + throughput: 661.8133686300463 estimated_peak_memory_range: - min: 212992 - max: 1975488 + min: 45056 + max: 20891680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 23 - job_id: jgz383qk5 + job_id: jg9lzo2mg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:15:31Z' + timestamp: '2024-12-11T22:37:54Z' - torchscript_onnx_tflite: - inference_time: 1743.0 - throughput: 573.7234652897304 + inference_time: 1684.0 + throughput: 593.8242280285035 estimated_peak_memory_range: min: 20480 - max: 12488752 + max: 13956256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: jgdx8xyep + job_id: jp4lyee25 job_status: Passed torchscript_onnx_qnn: - inference_time: 623.0 - throughput: 1605.1364365971108 + inference_time: 620.0 + throughput: 1612.9032258064517 estimated_peak_memory_range: - min: 208896 - max: 9948384 + min: 0 + max: 10570576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jp8q6qdxp + job_id: j5q6l41ep job_status: Passed torchscript_onnx: - inference_time: 1069.0 - throughput: 935.4536950420954 + inference_time: 1091.0 + throughput: 916.5902841429881 estimated_peak_memory_range: min: 0 - max: 24166720 + max: 25195520 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 23 - job_id: j5we8ek65 + job_id: jp14no1np job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:15:32Z' + timestamp: '2024-12-11T22:37:55Z' - torchscript_onnx_tflite: - inference_time: 1961.0 - throughput: 509.94390617032127 + inference_time: 2047.0 + throughput: 488.5197850512946 estimated_peak_memory_range: min: 12288 - max: 8893488 + max: 9660432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: j57yky1l5 + job_id: jpxkl0085 job_status: Passed torchscript_onnx_qnn: - inference_time: 665.0 - throughput: 1503.7593984962407 + inference_time: 747.0 + throughput: 1338.6880856760374 estimated_peak_memory_range: min: 0 - max: 9385840 + max: 9064528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jgkeoew2g + job_id: jglvyw825 job_status: Passed torchscript_onnx: - inference_time: 1069.0 - throughput: 935.4536950420954 + inference_time: 1068.0 + throughput: 936.3295880149813 estimated_peak_memory_range: min: 0 - max: 15591360 + max: 15817840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 23 - job_id: jg9lklrlg + job_id: jgdxd646p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:15:33Z' + timestamp: '2024-12-11T22:37:56Z' - torchscript_onnx_tflite: - inference_time: 2447.0 - throughput: 408.6636697997548 + inference_time: 2473.0 + throughput: 404.36716538617065 estimated_peak_memory_range: min: 16384 - max: 2744944 + max: 81628952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: jp4lml6v5 + job_id: j5mn0997p job_status: Passed torchscript_onnx_qnn: - inference_time: 875.0 - throughput: 1142.857142857143 + inference_time: 880.0 + throughput: 1136.3636363636363 estimated_peak_memory_range: - min: 229376 - max: 1545216 + min: 221184 + max: 1541288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: j5q6z6x4p + job_id: j56y8omnp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:15:22Z' + timestamp: '2024-12-11T22:37:45Z' - torchscript_onnx_tflite: - inference_time: 17003.0 - throughput: 58.81315062047874 + inference_time: 16079.0 + throughput: 62.19292244542571 estimated_peak_memory_range: - min: 6332416 - max: 16662864 + min: 6307840 + max: 19092960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,7 +257,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: jpxk3k815 + job_id: jgn6z11j5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 11728.0 + throughput: 85.26603001364256 + estimated_peak_memory_range: + min: 176128 + max: 10405664 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 21 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 21 + job_id: jgo2ldw1p job_status: Passed reference_device_info: name: SA7255P ADP @@ -266,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:15:24Z' + timestamp: '2024-12-11T22:37:47Z' - torchscript_onnx_tflite: - inference_time: 2709.0 - throughput: 369.139904023625 + inference_time: 2568.0 + throughput: 389.4080996884735 estimated_peak_memory_range: - min: 6328320 - max: 84689928 + min: 16384 + max: 3523552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: j5mnon1wp + job_id: jprvlxxkg job_status: Passed torchscript_onnx_qnn: - inference_time: 901.0 - throughput: 1109.8779134295228 + inference_time: 944.0 + throughput: 1059.322033898305 estimated_peak_memory_range: - min: 233472 - max: 1498744 + min: 229376 + max: 1455032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jp3jxjllg + job_id: jpv6l2mz5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -304,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:15:25Z' + timestamp: '2024-12-11T22:37:48Z' - torchscript_onnx_tflite: - inference_time: 3956.0 - throughput: 252.78058645096056 + inference_time: 5678.0 + throughput: 176.11835153222967 estimated_peak_memory_range: - min: 16384 - max: 8334160 + min: 6307840 + max: 14254544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: jgn6o6dr5 + job_id: jp2kroo6p job_status: Passed torchscript_onnx_qnn: - inference_time: 2212.0 - throughput: 452.07956600361666 + inference_time: 1799.0 + throughput: 555.864369093941 estimated_peak_memory_range: - min: 0 - max: 6089216 + min: 212992 + max: 6199136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jgo2o27xp + job_id: jgjvr3y1g job_status: Passed reference_device_info: name: SA8295P ADP @@ -342,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:15:26Z' + timestamp: '2024-12-11T22:37:49Z' - torchscript_onnx_tflite: - inference_time: 2446.0 - throughput: 408.8307440719542 + inference_time: 2484.0 + throughput: 402.5764895330113 estimated_peak_memory_range: - min: 24576 - max: 3029592 + min: 6307840 + max: 84044064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: jprvovm9g + job_id: jpy1o880p job_status: Passed torchscript_onnx_qnn: - inference_time: 897.0 - throughput: 1114.8272017837235 + inference_time: 906.0 + throughput: 1103.7527593818984 estimated_peak_memory_range: - min: 217088 - max: 1556224 + min: 225280 + max: 4819192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jpv6e6yj5 + job_id: jped76x85 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -380,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:15:27Z' + timestamp: '2024-12-11T22:37:50Z' - torchscript_onnx_tflite: - inference_time: 4206.0 - throughput: 237.75558725630052 + inference_time: 4436.0 + throughput: 225.4283137962128 estimated_peak_memory_range: - min: 6332416 - max: 14190128 + min: 6213632 + max: 18849296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: jp2k4kq4p + job_id: jp0zmo905 job_status: Passed torchscript_onnx_qnn: - inference_time: 1851.0 - throughput: 540.2485143165857 + inference_time: 1860.0 + throughput: 537.6344086021505 estimated_peak_memory_range: - min: 217088 - max: 5788912 + min: 0 + max: 6303152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jgjvov6xg + job_id: jgz3lzy45 job_status: Passed reference_device_info: name: SA8775P ADP @@ -418,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:15:29Z' + timestamp: '2024-12-11T22:37:52Z' - torchscript_onnx_tflite: - inference_time: 3689.0 - throughput: 271.07617240444563 + inference_time: 4056.0 + throughput: 246.5483234714004 estimated_peak_memory_range: min: 6307840 - max: 19165760 + max: 18626112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: jpy1q1k7p + job_id: jp8qejrqp job_status: Passed torchscript_onnx_qnn: - inference_time: 1305.0 - throughput: 766.2835249042146 + inference_time: 1261.0 + throughput: 793.0214115781126 estimated_peak_memory_range: min: 208896 - max: 12696160 + max: 11330768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jped8d015 + job_id: j5welyz45 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -456,13 +471,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:15:30Z' + timestamp: '2024-12-11T22:37:53Z' - torchscript_onnx_qnn: - inference_time: 982.0 - throughput: 1018.3299389002036 + inference_time: 1000.0 + throughput: 1000.0 estimated_peak_memory_range: - min: 204800 - max: 204800 + min: 221184 + max: 221184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -470,7 +485,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jglvov985 + job_id: jp3jzo7mg + job_status: Passed + torchscript_onnx: + inference_time: 1504.0 + throughput: 664.8936170212766 + estimated_peak_memory_range: + min: 8826880 + max: 8826880 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 23 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 23 + job_id: j57yeonn5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:15:34Z' + timestamp: '2024-12-11T22:37:57Z' diff --git a/qai_hub_models/models/xlsr_quantized/info.yaml b/qai_hub_models/models/xlsr_quantized/info.yaml index 00f12c99..7ba507c5 100644 --- a/qai_hub_models/models/xlsr_quantized/info.yaml +++ b/qai_hub_models/models/xlsr_quantized/info.yaml @@ -8,6 +8,7 @@ use_case: Super Resolution description: XLSR is designed for lightweight real-time upscaling of images. tags: - quantized +imsdk_supported: true research_paper: https://arxiv.org/abs/2105.10288 research_paper_title: Extremely Lightweight Quantization Robust Real-Time Single-Image Super Resolution for Mobile Devices diff --git a/qai_hub_models/models/xlsr_quantized/perf.yaml b/qai_hub_models/models/xlsr_quantized/perf.yaml index b16d1e07..2f0bb7d5 100644 --- a/qai_hub_models/models/xlsr_quantized/perf.yaml +++ b/qai_hub_models/models/xlsr_quantized/perf.yaml @@ -50,15 +50,15 @@ aggregated: models: - name: XLSR-Quantized universal_assets: - torchscript_onnx_tflite: mqkkxg77q - torchscript_onnx: mmdy89ovm + torchscript_onnx_tflite: mq36eov6q + torchscript_onnx: mm5edod2m performance_metrics: - torchscript_onnx_tflite: - inference_time: 1070.0 - throughput: 934.5794392523364 + inference_time: 1106.0 + throughput: 904.1591320072333 estimated_peak_memory_range: - min: 20480 - max: 9618336 + min: 32768 + max: 10256496 primary_compute_unit: NPU precision: int8 layer_info: @@ -66,14 +66,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jpy1q4vlp + job_id: jp14n0k7p job_status: Passed torchscript_onnx_qnn: - inference_time: 552.0 - throughput: 1811.5942028985507 + inference_time: 556.0 + throughput: 1798.5611510791366 estimated_peak_memory_range: - min: 65536 - max: 12483768 + min: 61440 + max: 2867680 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,14 +81,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jgz38xwx5 + job_id: jgke2zwng job_status: Passed torchscript_onnx: - inference_time: 4180.0 - throughput: 239.23444976076556 + inference_time: 3995.0 + throughput: 250.31289111389236 estimated_peak_memory_range: - min: 9056256 - max: 40610808 + min: 9064448 + max: 11492920 primary_compute_unit: NPU precision: int8 layer_info: @@ -96,7 +96,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 12 total_layers: 75 - job_id: jp0zd1kn5 + job_id: j57yezm95 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -105,13 +105,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T03:38:48Z' + timestamp: '2024-12-12T01:07:45Z' - torchscript_onnx_tflite: - inference_time: 892.0 - throughput: 1121.0762331838564 + inference_time: 857.0 + throughput: 1166.8611435239206 estimated_peak_memory_range: - min: 20480 - max: 13243040 + min: 16384 + max: 11679488 primary_compute_unit: NPU precision: int8 layer_info: @@ -119,14 +119,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jp0zd1en5 + job_id: jgdxdwyzp job_status: Passed torchscript_onnx_qnn: - inference_time: 387.0 - throughput: 2583.9793281653747 + inference_time: 379.0 + throughput: 2638.5224274406332 estimated_peak_memory_range: - min: 61440 - max: 12008096 + min: 65536 + max: 12243072 primary_compute_unit: NPU precision: int8 layer_info: @@ -134,14 +134,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: j5we8dxm5 + job_id: j5q6l8xop job_status: Passed torchscript_onnx: - inference_time: 2923.0 - throughput: 342.11426616489905 + inference_time: 2972.0 + throughput: 336.47375504710635 estimated_peak_memory_range: min: 0 - max: 36609616 + max: 36842992 primary_compute_unit: NPU precision: int8 layer_info: @@ -149,7 +149,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 12 total_layers: 75 - job_id: jp8q638op + job_id: jp4lyq715 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -158,13 +158,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T03:38:50Z' + timestamp: '2024-12-12T01:07:48Z' - torchscript_onnx_tflite: - inference_time: 1392.0 - throughput: 718.3908045977012 + inference_time: 1191.0 + throughput: 839.6305625524769 estimated_peak_memory_range: - min: 32768 - max: 9688144 + min: 12288 + max: 9935616 primary_compute_unit: NPU precision: int8 layer_info: @@ -172,14 +172,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jp8q63wop + job_id: j57yez195 job_status: Passed torchscript_onnx_qnn: - inference_time: 415.0 - throughput: 2409.6385542168673 + inference_time: 405.0 + throughput: 2469.135802469136 estimated_peak_memory_range: - min: 57344 - max: 9541952 + min: 61440 + max: 11306224 primary_compute_unit: NPU precision: int8 layer_info: @@ -187,14 +187,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jg9lk388g + job_id: jglvyndm5 job_status: Passed torchscript_onnx: - inference_time: 3087.0 - throughput: 323.9390994493035 + inference_time: 2565.0 + throughput: 389.8635477582846 estimated_peak_memory_range: min: 0 - max: 26169552 + max: 26444992 primary_compute_unit: NPU precision: int8 layer_info: @@ -202,7 +202,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 12 total_layers: 75 - job_id: j5q6z7wop + job_id: j5mn0r79p job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -211,13 +211,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T03:38:53Z' + timestamp: '2024-12-12T01:07:51Z' - torchscript_onnx_tflite: - inference_time: 4302.0 - throughput: 232.4500232450023 + inference_time: 2493.0 + throughput: 401.1231448054553 estimated_peak_memory_range: - min: 1585152 - max: 13560320 + min: 1617920 + max: 18819872 primary_compute_unit: NPU precision: int8 layer_info: @@ -225,14 +225,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jgkeolrng + job_id: jp4lyq615 job_status: Passed torchscript_onnx_qnn: - inference_time: 1110.0 - throughput: 900.9009009009009 + inference_time: 1124.0 + throughput: 889.6797153024911 estimated_peak_memory_range: - min: 61440 - max: 8333424 + min: 12288 + max: 7301680 primary_compute_unit: NPU precision: int8 layer_info: @@ -240,7 +240,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jp147d37p + job_id: j56y86xyp job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -249,13 +249,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T03:38:22Z' + timestamp: '2024-12-12T01:07:20Z' - torchscript_onnx_tflite: - inference_time: 16101.0 - throughput: 62.1079436059872 + inference_time: 13458.0 + throughput: 74.3052459503641 estimated_peak_memory_range: - min: 4300800 - max: 22211024 + min: 4190208 + max: 11070072 primary_compute_unit: GPU precision: int8 layer_info: @@ -263,7 +263,7 @@ models: layers_on_gpu: 9 layers_on_cpu: 5 total_layers: 19 - job_id: j5q6z79op + job_id: jpxklv8l5 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -272,13 +272,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T03:37:52Z' + timestamp: '2024-12-12T01:06:49Z' - torchscript_onnx_tflite: - inference_time: 1069.0 - throughput: 935.4536950420954 + inference_time: 1067.0 + throughput: 937.207122774133 estimated_peak_memory_range: - min: 16384 - max: 86744296 + min: 815104 + max: 10991216 primary_compute_unit: NPU precision: int8 layer_info: @@ -286,14 +286,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jglvo07m5 + job_id: j5mn0r19p job_status: Passed torchscript_onnx_qnn: - inference_time: 441.0 - throughput: 2267.573696145125 + inference_time: 447.0 + throughput: 2237.136465324385 estimated_peak_memory_range: - min: 81920 - max: 1235448 + min: 77824 + max: 1714560 primary_compute_unit: NPU precision: int8 layer_info: @@ -301,7 +301,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jgdx8r0zp + job_id: jp3jzkdng job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -310,13 +310,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T03:38:25Z' + timestamp: '2024-12-12T01:07:23Z' - torchscript_onnx_tflite: - inference_time: 4367.0 - throughput: 228.9901534234028 + inference_time: 4338.0 + throughput: 230.52097740894422 estimated_peak_memory_range: - min: 1589248 - max: 11513136 + min: 1626112 + max: 14757952 primary_compute_unit: NPU precision: int8 layer_info: @@ -324,22 +324,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: j56yr3vyp - job_status: Passed - torchscript_onnx_qnn: - inference_time: 2530.0 - throughput: 395.25691699604744 - estimated_peak_memory_range: - min: 12288 - max: 5785824 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 21 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 21 - job_id: jp4lmx815 + job_id: jgn6z2dq5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -348,13 +333,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T03:38:31Z' + timestamp: '2024-12-12T01:07:29Z' - torchscript_onnx_tflite: - inference_time: 1066.0 - throughput: 938.0863039399625 + inference_time: 1062.0 + throughput: 941.6195856873823 estimated_peak_memory_range: min: 16384 - max: 63405232 + max: 75054128 primary_compute_unit: NPU precision: int8 layer_info: @@ -362,14 +347,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jp3jx48ng + job_id: jprvlkm7g job_status: Passed torchscript_onnx_qnn: - inference_time: 442.0 - throughput: 2262.443438914027 + inference_time: 439.0 + throughput: 2277.904328018223 estimated_peak_memory_range: - min: 81920 - max: 1554896 + min: 86016 + max: 1406424 primary_compute_unit: NPU precision: int8 layer_info: @@ -377,7 +362,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jpxk37ml5 + job_id: jgjvrx9eg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -386,13 +371,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T03:38:34Z' + timestamp: '2024-12-12T01:07:31Z' - torchscript_onnx_tflite: - inference_time: 1912.0 - throughput: 523.0125523012553 + inference_time: 1895.0 + throughput: 527.7044854881267 estimated_peak_memory_range: - min: 16384 - max: 8920048 + min: 802816 + max: 9312336 primary_compute_unit: NPU precision: int8 layer_info: @@ -400,14 +385,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jgo2o1mkp + job_id: jp2kr8qqp job_status: Passed torchscript_onnx_qnn: - inference_time: 960.0 - throughput: 1041.6666666666667 + inference_time: 1124.0 + throughput: 889.6797153024911 estimated_peak_memory_range: min: 12288 - max: 5916528 + max: 5967488 primary_compute_unit: NPU precision: int8 layer_info: @@ -415,7 +400,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: j5mnow49p + job_id: jped79qv5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -424,13 +409,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T03:38:36Z' + timestamp: '2024-12-12T01:07:34Z' - torchscript_onnx_tflite: - inference_time: 1069.0 - throughput: 935.4536950420954 + inference_time: 1079.0 + throughput: 926.7840593141798 estimated_peak_memory_range: - min: 24576 - max: 2584856 + min: 28672 + max: 2559240 primary_compute_unit: NPU precision: int8 layer_info: @@ -438,14 +423,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jpv6e14r5 + job_id: jpy1oeklp job_status: Passed torchscript_onnx_qnn: - inference_time: 443.0 - throughput: 2257.3363431151242 + inference_time: 449.0 + throughput: 2227.1714922048996 estimated_peak_memory_range: - min: 86016 - max: 1304944 + min: 81920 + max: 1277808 primary_compute_unit: NPU precision: int8 layer_info: @@ -453,7 +438,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jgn6o9xq5 + job_id: jgz3le6x5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -462,13 +447,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T03:38:39Z' + timestamp: '2024-12-12T01:07:37Z' - torchscript_onnx_tflite: - inference_time: 2116.0 - throughput: 472.5897920604915 + inference_time: 2122.0 + throughput: 471.25353440150803 estimated_peak_memory_range: - min: 385024 - max: 8793728 + min: 806912 + max: 15540208 primary_compute_unit: NPU precision: int8 layer_info: @@ -476,14 +461,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jgjvo01eg + job_id: jp0zmy8n5 job_status: Passed torchscript_onnx_qnn: - inference_time: 981.0 - throughput: 1019.367991845056 + inference_time: 991.0 + throughput: 1009.0817356205853 estimated_peak_memory_range: - min: 16384 - max: 5790928 + min: 12288 + max: 6021088 primary_compute_unit: NPU precision: int8 layer_info: @@ -491,7 +476,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jprvo497g + job_id: j5welokm5 job_status: Passed reference_device_info: name: SA8775P ADP @@ -500,13 +485,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T03:38:42Z' + timestamp: '2024-12-12T01:07:40Z' - torchscript_onnx_tflite: - inference_time: 1582.0 - throughput: 632.1112515802781 + inference_time: 1534.0 + throughput: 651.8904823989569 estimated_peak_memory_range: - min: 806912 - max: 13973040 + min: 16384 + max: 13334608 primary_compute_unit: NPU precision: int8 layer_info: @@ -514,14 +499,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 19 - job_id: jped8r2v5 + job_id: jp8qeodop job_status: Passed torchscript_onnx_qnn: - inference_time: 631.0 - throughput: 1584.7860538827258 + inference_time: 622.0 + throughput: 1607.717041800643 estimated_peak_memory_range: min: 65536 - max: 14216432 + max: 12200464 primary_compute_unit: NPU precision: int8 layer_info: @@ -529,7 +514,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jp2k47jqp + job_id: jgdxdwkzp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -538,13 +523,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T03:38:45Z' + timestamp: '2024-12-12T01:07:43Z' - torchscript_onnx_qnn: - inference_time: 573.0 - throughput: 1745.2006980802792 + inference_time: 558.0 + throughput: 1792.1146953405018 estimated_peak_memory_range: - min: 135168 - max: 135168 + min: 57344 + max: 57344 primary_compute_unit: NPU precision: int8 layer_info: @@ -552,14 +537,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: j57ykj695 + job_id: jgo2lyxkp job_status: Passed torchscript_onnx: - inference_time: 7535.0 - throughput: 132.71400132714 + inference_time: 7666.0 + throughput: 130.44612575006522 estimated_peak_memory_range: - min: 8863744 - max: 8863744 + min: 8921088 + max: 8921088 primary_compute_unit: NPU precision: int8 layer_info: @@ -567,7 +552,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 12 total_layers: 75 - job_id: jglvo0om5 + job_id: jgn6z24q5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -576,4 +561,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T03:38:56Z' + timestamp: '2024-12-12T01:07:54Z' diff --git a/qai_hub_models/models/yolonas/perf.yaml b/qai_hub_models/models/yolonas/perf.yaml index 6acfc7d5..3cb34884 100644 --- a/qai_hub_models/models/yolonas/perf.yaml +++ b/qai_hub_models/models/yolonas/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: Yolo-NAS universal_assets: - torchscript_onnx_tflite: mqex902yn - torchscript_onnx: mqkk4zrwq + torchscript_onnx_tflite: mq2140r0m + torchscript_onnx: mqkvkowxm performance_metrics: - torchscript_onnx_tflite: - inference_time: 9086.0 - throughput: 110.0594320933304 + inference_time: 8967.0 + throughput: 111.52001784320285 estimated_peak_memory_range: - min: 225280 - max: 24136728 + min: 237568 + max: 22438040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 201 - job_id: jped8dx05 + job_id: jp14non7p job_status: Passed torchscript_onnx_qnn: - inference_time: 9690.0 - throughput: 103.19917440660474 + inference_time: 9682.0 + throughput: 103.2844453625284 estimated_peak_memory_range: - min: 4931584 - max: 23455416 + min: 4943872 + max: 20771304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: j57yky8l5 + job_id: jgn6z1zj5 job_status: Passed torchscript_onnx: - inference_time: 7750.0 - throughput: 129.03225806451613 + inference_time: 7850.0 + throughput: 127.38853503184713 estimated_peak_memory_range: - min: 28672 - max: 173789416 + min: 16384 + max: 26387888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jglvovq85 + job_id: jgo2ldd1p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:14:48Z' + timestamp: '2024-12-11T22:37:10Z' - torchscript_onnx_tflite: - inference_time: 6117.0 - throughput: 163.47882949158085 + inference_time: 6118.0 + throughput: 163.45210853220007 estimated_peak_memory_range: min: 217088 - max: 39187296 + max: 39679392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 201 - job_id: jgz383y65 + job_id: jgdxd6dzp job_status: Passed torchscript_onnx_qnn: - inference_time: 6522.0 - throughput: 153.32720024532352 + inference_time: 6514.0 + throughput: 153.51550506601166 estimated_peak_memory_range: - min: 2850816 - max: 36429312 + min: 4931584 + max: 38190544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jp4lml2v5 + job_id: jprvlxlkg job_status: Passed torchscript_onnx: - inference_time: 5225.0 - throughput: 191.38755980861245 + inference_time: 5289.0 + throughput: 189.07165815844206 estimated_peak_memory_range: - min: 5394432 - max: 123421488 + min: 5361664 + max: 125266080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: j56yry00p + job_id: jpv6l22z5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:14:49Z' + timestamp: '2024-12-11T22:37:11Z' - torchscript_onnx_tflite: - inference_time: 5166.0 - throughput: 193.57336430507164 + inference_time: 6265.0 + throughput: 159.6169193934557 estimated_peak_memory_range: min: 212992 - max: 36671744 + max: 38364560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 201 - job_id: j5we8ezj5 + job_id: j5welyl45 job_status: Passed torchscript_onnx_qnn: - inference_time: 6492.0 - throughput: 154.03573629081947 + inference_time: 6308.0 + throughput: 158.5288522511097 estimated_peak_memory_range: min: 4927488 - max: 34561776 + max: 36382224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jpxk3kz15 + job_id: jp2kror6p job_status: Passed torchscript_onnx: - inference_time: 4318.0 - throughput: 231.5886984715146 + inference_time: 5121.0 + throughput: 195.27436047646944 estimated_peak_memory_range: - min: 0 - max: 58905264 + min: 1114112 + max: 59888192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jp3jxjrlg + job_id: jgjvr331g job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:14:50Z' + timestamp: '2024-12-11T22:37:12Z' - torchscript_onnx_tflite: - inference_time: 9017.0 - throughput: 110.90163025396474 + inference_time: 8958.0 + throughput: 111.63206072784104 estimated_peak_memory_range: min: 217088 - max: 21851176 + max: 25181824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 201 - job_id: jg9lkl2vg + job_id: jg9lzozmg job_status: Passed torchscript_onnx_qnn: - inference_time: 9392.0 - throughput: 106.47359454855196 + inference_time: 9386.0 + throughput: 106.54165778819518 estimated_peak_memory_range: - min: 4947968 - max: 6179888 + min: 4976640 + max: 6197440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: j5mnonlwp + job_id: jpy1o8o0p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:14:38Z' + timestamp: '2024-12-11T22:37:01Z' - torchscript_onnx_tflite: - inference_time: 223043.0 - throughput: 4.4834404128351935 + inference_time: 223031.0 + throughput: 4.483681640668786 estimated_peak_memory_range: - min: 16384 - max: 34112320 + min: 135168 + max: 34735008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 201 - job_id: jp14741lp + job_id: jp14nonnp job_status: Passed torchscript_onnx_qnn: - inference_time: 223721.0 - throughput: 4.469853075929394 + inference_time: 223657.0 + throughput: 4.471132135367997 estimated_peak_memory_range: - min: 4931584 - max: 10142768 + min: 1970176 + max: 12281392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jprvov79g + job_id: jp8qejjqp job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:14:41Z' + timestamp: '2024-12-11T22:37:04Z' - torchscript_onnx_tflite: - inference_time: 9026.0 - throughput: 110.79104808331486 + inference_time: 9029.0 + throughput: 110.75423634954036 estimated_peak_memory_range: - min: 221184 - max: 24618936 + min: 245760 + max: 21913584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 201 - job_id: jgdx8x4lp + job_id: jgdxd6d6p job_status: Passed torchscript_onnx_qnn: - inference_time: 9388.0 - throughput: 106.51896037494674 + inference_time: 9349.0 + throughput: 106.96331158412664 estimated_peak_memory_range: - min: 4976640 - max: 6674504 + min: 5468160 + max: 6680880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jp2k4kz4p + job_id: jgke266vg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:14:42Z' + timestamp: '2024-12-11T22:37:05Z' - torchscript_onnx_tflite: - inference_time: 14072.0 - throughput: 71.0631040363843 + inference_time: 14091.0 + throughput: 70.96728408203818 estimated_peak_memory_range: - min: 217088 - max: 33433088 + min: 237568 + max: 35239040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 201 - job_id: j5we8er65 + job_id: j57yeoen5 job_status: Passed torchscript_onnx_qnn: - inference_time: 13737.0 - throughput: 72.79609812914028 + inference_time: 14087.0 + throughput: 70.98743522396536 estimated_peak_memory_range: - min: 36864 - max: 5676944 + min: 77824 + max: 6122880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jpy1q1y7p + job_id: j5q6l44ep job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:14:43Z' + timestamp: '2024-12-11T22:37:06Z' - torchscript_onnx_tflite: - inference_time: 9019.0 - throughput: 110.87703736556159 + inference_time: 9009.0 + throughput: 111.000111000111 estimated_peak_memory_range: - min: 0 - max: 23465272 + min: 217088 + max: 21868000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 201 - job_id: jg9lklqlg + job_id: jp4lyey25 job_status: Passed torchscript_onnx_qnn: - inference_time: 9389.0 - throughput: 106.50761529449356 + inference_time: 9349.0 + throughput: 106.96331158412664 estimated_peak_memory_range: - min: 4956160 - max: 6749128 + min: 4968448 + max: 6110304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jp8q6qkxp + job_id: jglvyww25 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:14:44Z' + timestamp: '2024-12-11T22:37:07Z' - torchscript_onnx_tflite: - inference_time: 15662.0 - throughput: 63.84880602732729 + inference_time: 15681.0 + throughput: 63.77144314775843 estimated_peak_memory_range: - min: 217088 - max: 35272576 + min: 0 + max: 37371808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 201 - job_id: jp1474m2p + job_id: jpxkl0l85 job_status: Passed torchscript_onnx_qnn: - inference_time: 16276.0 - throughput: 61.44015728680265 + inference_time: 16310.0 + throughput: 61.31207847946045 estimated_peak_memory_range: - min: 1302528 - max: 6954912 + min: 737280 + max: 6624816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jgkeoek2g + job_id: j56y8oonp job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:14:46Z' + timestamp: '2024-12-11T22:37:08Z' - torchscript_onnx_tflite: - inference_time: 12334.0 - throughput: 81.07669855683477 + inference_time: 12369.0 + throughput: 80.84727948904519 estimated_peak_memory_range: - min: 217088 - max: 40387536 + min: 233472 + max: 42508816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 201 - job_id: jgdx8xmep + job_id: j5mn0907p job_status: Passed torchscript_onnx_qnn: - inference_time: 12893.0 - throughput: 77.5614674629644 + inference_time: 13049.0 + throughput: 76.63422484481569 estimated_peak_memory_range: - min: 4952064 - max: 35872080 + min: 4931584 + max: 38615568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: j5q6z6d4p + job_id: jp3jzoomg job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:14:47Z' + timestamp: '2024-12-11T22:37:09Z' - torchscript_onnx_qnn: - inference_time: 10404.0 - throughput: 96.11687812379854 + inference_time: 10298.0 + throughput: 97.10623422023694 estimated_peak_memory_range: min: 4923392 max: 4923392 @@ -485,7 +485,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jgn6o6wr5 + job_id: jp0zmoo05 + job_status: Passed + torchscript_onnx: + inference_time: 8331.0 + throughput: 120.03360941063498 + estimated_peak_memory_range: + min: 23670784 + max: 23670784 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 290 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 290 + job_id: jped76685 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -494,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:14:51Z' + timestamp: '2024-12-11T22:37:13Z' diff --git a/qai_hub_models/models/yolonas_quantized/info.yaml b/qai_hub_models/models/yolonas_quantized/info.yaml index e4d1c2b6..8e499520 100644 --- a/qai_hub_models/models/yolonas_quantized/info.yaml +++ b/qai_hub_models/models/yolonas_quantized/info.yaml @@ -11,6 +11,7 @@ use_case: Object Detection tags: - real-time - quantized +imsdk_supported: true research_paper: https://deci.ai/blog/yolo-nas-object-detection-foundation-model/ research_paper_title: 'YOLO-NAS by Deci Achieves SOTA Performance on Object Detection Using Neural Architecture Search' license: https://github.com/Deci-AI/super-gradients/blob/master/YOLONAS.md#license diff --git a/qai_hub_models/models/yolonas_quantized/perf.yaml b/qai_hub_models/models/yolonas_quantized/perf.yaml index 25567088..b514f761 100644 --- a/qai_hub_models/models/yolonas_quantized/perf.yaml +++ b/qai_hub_models/models/yolonas_quantized/perf.yaml @@ -47,14 +47,14 @@ aggregated: models: - name: Yolo-NAS-Quantized universal_assets: - torchscript_onnx_tflite: mnwe13k3n + torchscript_onnx_tflite: mn1wz2lrm performance_metrics: - torchscript_onnx_tflite: - inference_time: 4778.0 - throughput: 209.2925910422771 + inference_time: 4693.0 + throughput: 213.08331557639036 estimated_peak_memory_range: - min: 65536 - max: 14803872 + min: 16384 + max: 24576480 primary_compute_unit: NPU precision: int8 layer_info: @@ -62,14 +62,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 205 - job_id: jgn6o9eq5 + job_id: j5weloz35 job_status: Passed torchscript_onnx_qnn: - inference_time: 4328.0 - throughput: 231.0536044362292 + inference_time: 4307.0 + throughput: 232.18017181332715 estimated_peak_memory_range: - min: 1228800 - max: 12246608 + min: 106496 + max: 13519504 primary_compute_unit: NPU precision: int8 layer_info: @@ -77,7 +77,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jgdx8rrzp + job_id: jp3jzkrng job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -86,13 +86,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T03:29:24Z' + timestamp: '2024-12-12T00:58:31Z' - torchscript_onnx_tflite: - inference_time: 3053.0 - throughput: 327.54667540124467 + inference_time: 3057.0 + throughput: 327.11808963035656 estimated_peak_memory_range: - min: 12288 - max: 32765088 + min: 32768 + max: 32069168 primary_compute_unit: NPU precision: int8 layer_info: @@ -100,14 +100,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 205 - job_id: jp2k47mqp + job_id: jp14n018p job_status: Passed torchscript_onnx_qnn: inference_time: 2915.0 throughput: 343.0531732418525 estimated_peak_memory_range: min: 1245184 - max: 27631856 + max: 33234448 primary_compute_unit: NPU precision: int8 layer_info: @@ -115,7 +115,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jp4lmxx15 + job_id: jgjvrx8eg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -124,13 +124,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T03:29:51Z' + timestamp: '2024-12-12T00:58:57Z' - torchscript_onnx_tflite: - inference_time: 3138.0 - throughput: 318.6743148502231 + inference_time: 3133.0 + throughput: 319.1828917969997 estimated_peak_memory_range: min: 8192 - max: 26138576 + max: 29931648 primary_compute_unit: NPU precision: int8 layer_info: @@ -138,14 +138,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 205 - job_id: jp8q637op + job_id: j5welozm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2973.0 - throughput: 336.3605785401951 + inference_time: 2447.0 + throughput: 408.6636697997548 estimated_peak_memory_range: - min: 1241088 - max: 25207856 + min: 0 + max: 29039904 primary_compute_unit: NPU precision: int8 layer_info: @@ -153,7 +153,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: j5mnoww9p + job_id: jped79nv5 job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -162,13 +162,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T03:30:18Z' + timestamp: '2024-12-12T00:59:23Z' - torchscript_onnx_tflite: - inference_time: 14283.0 - throughput: 70.01330252748022 + inference_time: 14750.0 + throughput: 67.79661016949153 estimated_peak_memory_range: min: 81920 - max: 37919680 + max: 38696240 primary_compute_unit: NPU precision: int8 layer_info: @@ -176,14 +176,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 205 - job_id: jgkeolyng + job_id: jp14n017p job_status: Passed torchscript_onnx_qnn: - inference_time: 15615.0 - throughput: 64.04098623118796 + inference_time: 15376.0 + throughput: 65.03642039542143 estimated_peak_memory_range: - min: 1273856 - max: 9113440 + min: 1286144 + max: 8326896 primary_compute_unit: NPU precision: int8 layer_info: @@ -191,7 +191,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jp2k477qp + job_id: jgz3le0x5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -200,7 +200,7 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T03:30:45Z' + timestamp: '2024-12-12T00:59:48Z' - reference_device_info: name: RB5 (Proxy) os: '12' @@ -208,13 +208,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T03:25:51Z' + timestamp: '2024-12-12T00:55:02Z' - torchscript_onnx_tflite: - inference_time: 4781.0 - throughput: 209.16126333403054 + inference_time: 4683.0 + throughput: 213.53833013025837 estimated_peak_memory_range: - min: 24576 - max: 20831688 + min: 16384 + max: 20501368 primary_compute_unit: NPU precision: int8 layer_info: @@ -222,14 +222,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 205 - job_id: jp3jx44ng + job_id: j5mn0rk9p job_status: Passed torchscript_onnx_qnn: - inference_time: 4051.0 - throughput: 246.85262898049865 + inference_time: 4055.0 + throughput: 246.6091245376079 estimated_peak_memory_range: - min: 1363968 - max: 2622088 + min: 1277952 + max: 2988744 primary_compute_unit: NPU precision: int8 layer_info: @@ -237,7 +237,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jpy1q44lp + job_id: j5welorm5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -246,13 +246,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T03:31:13Z' + timestamp: '2024-12-12T01:00:16Z' - torchscript_onnx_tflite: - inference_time: 33807.0 - throughput: 29.57967284881829 + inference_time: 33757.0 + throughput: 29.623485499303847 estimated_peak_memory_range: - min: 106496 - max: 26476672 + min: 417792 + max: 27164752 primary_compute_unit: NPU precision: int8 layer_info: @@ -260,14 +260,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 205 - job_id: jgo2o11kp + job_id: jgn6z2qq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 32945.0 - throughput: 30.353619669145544 + inference_time: 33028.0 + throughput: 30.27734043841589 estimated_peak_memory_range: - min: 1257472 - max: 6779520 + min: 1249280 + max: 11593856 primary_compute_unit: NPU precision: int8 layer_info: @@ -275,7 +275,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jgkeollng + job_id: jp14n0m7p job_status: Passed reference_device_info: name: SA7255P ADP @@ -284,13 +284,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T03:32:06Z' + timestamp: '2024-12-12T01:01:08Z' - torchscript_onnx_tflite: - inference_time: 4765.0 - throughput: 209.8635886673662 + inference_time: 4788.0 + throughput: 208.85547201336675 estimated_peak_memory_range: - min: 20480 - max: 20211688 + min: 16384 + max: 18261528 primary_compute_unit: NPU precision: int8 layer_info: @@ -298,14 +298,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 205 - job_id: jpv6e11r5 + job_id: jprvlkd7g job_status: Passed torchscript_onnx_qnn: - inference_time: 4059.0 - throughput: 246.3661000246366 + inference_time: 4063.0 + throughput: 246.1235540241201 estimated_peak_memory_range: - min: 1261568 - max: 2557832 + min: 3358720 + max: 4562192 primary_compute_unit: NPU precision: int8 layer_info: @@ -313,7 +313,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jglvo0em5 + job_id: j57yez895 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -322,13 +322,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T03:32:33Z' + timestamp: '2024-12-12T01:01:35Z' - torchscript_onnx_tflite: - inference_time: 6551.0 - throughput: 152.64845061822624 + inference_time: 6586.0 + throughput: 151.83723048891588 estimated_peak_memory_range: min: 65536 - max: 24947440 + max: 25114304 primary_compute_unit: NPU precision: int8 layer_info: @@ -336,14 +336,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 205 - job_id: jped8rrv5 + job_id: jp0zmy9n5 job_status: Passed torchscript_onnx_qnn: - inference_time: 6072.0 - throughput: 164.69038208168644 + inference_time: 6030.0 + throughput: 165.8374792703151 estimated_peak_memory_range: - min: 1282048 - max: 7127792 + min: 1294336 + max: 7441728 primary_compute_unit: NPU precision: int8 layer_info: @@ -351,7 +351,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: j56yr3qyp + job_id: jpxklvzl5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -360,13 +360,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T03:32:59Z' + timestamp: '2024-12-12T01:02:01Z' - torchscript_onnx_tflite: - inference_time: 4747.0 - throughput: 210.6593638087213 + inference_time: 4781.0 + throughput: 209.16126333403054 estimated_peak_memory_range: - min: 20480 - max: 20999864 + min: 57344 + max: 19844056 primary_compute_unit: NPU precision: int8 layer_info: @@ -374,14 +374,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 205 - job_id: jgz38xxx5 + job_id: j5q6l81op job_status: Passed torchscript_onnx_qnn: - inference_time: 4068.0 - throughput: 245.82104228121926 + inference_time: 4062.0 + throughput: 246.18414574101428 estimated_peak_memory_range: - min: 1298432 - max: 2697472 + min: 4235264 + max: 5393920 primary_compute_unit: NPU precision: int8 layer_info: @@ -389,7 +389,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jgo2o1ekp + job_id: jprvlk77g job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -398,13 +398,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T03:33:24Z' + timestamp: '2024-12-12T01:02:26Z' - torchscript_onnx_tflite: - inference_time: 6494.0 - throughput: 153.9882968894364 + inference_time: 6472.0 + throughput: 154.51174289245984 estimated_peak_memory_range: - min: 94208 - max: 27496432 + min: 86016 + max: 29957712 primary_compute_unit: NPU precision: int8 layer_info: @@ -412,14 +412,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 205 - job_id: jg9lk338g + job_id: jglvynqm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 5553.0 - throughput: 180.08283810552854 + inference_time: 5594.0 + throughput: 178.7629603146228 estimated_peak_memory_range: - min: 1249280 - max: 6962704 + min: 1265664 + max: 7218496 primary_compute_unit: NPU precision: int8 layer_info: @@ -427,7 +427,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jpv6e1zr5 + job_id: jpy1oeylp job_status: Passed reference_device_info: name: SA8775P ADP @@ -436,13 +436,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T03:33:52Z' + timestamp: '2024-12-12T01:02:52Z' - torchscript_onnx_tflite: - inference_time: 5095.0 - throughput: 196.27085377821393 + inference_time: 5248.0 + throughput: 190.5487804878049 estimated_peak_memory_range: - min: 532480 - max: 33744144 + min: 65536 + max: 38929504 primary_compute_unit: NPU precision: int8 layer_info: @@ -450,14 +450,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 205 - job_id: jp147dd7p + job_id: j56y860yp job_status: Passed torchscript_onnx_qnn: - inference_time: 4714.0 - throughput: 212.13406873143828 + inference_time: 4693.0 + throughput: 213.08331557639036 estimated_peak_memory_range: min: 1245184 - max: 30270320 + max: 36408288 primary_compute_unit: NPU precision: int8 layer_info: @@ -465,7 +465,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jgz38xvx5 + job_id: jp0zmyxn5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -474,10 +474,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T03:34:17Z' + timestamp: '2024-12-12T01:03:19Z' - torchscript_onnx_qnn: - inference_time: 4509.0 - throughput: 221.77866489243735 + inference_time: 4443.0 + throughput: 225.07314877335133 estimated_peak_memory_range: min: 1232896 max: 1232896 @@ -488,7 +488,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jp8q633op + job_id: jg9lzvq8g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -497,4 +497,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T03:31:39Z' + timestamp: '2024-12-12T01:00:42Z' diff --git a/qai_hub_models/models/yolov11_det/perf.yaml b/qai_hub_models/models/yolov11_det/perf.yaml index 406d90dd..49317765 100644 --- a/qai_hub_models/models/yolov11_det/perf.yaml +++ b/qai_hub_models/models/yolov11_det/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: YOLOv11-Detection universal_assets: - torchscript_onnx_tflite: mqpz045on - torchscript_onnx: mnwezo5kn + torchscript_onnx_tflite: mqv64eljm + torchscript_onnx: mqyv3jzvq performance_metrics: - torchscript_onnx_tflite: - inference_time: 5424.0 - throughput: 184.36578171091446 + inference_time: 5632.0 + throughput: 177.5568181818182 estimated_peak_memory_range: - min: 225280 - max: 16414152 + min: 229376 + max: 16249384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 382 - job_id: j5q6z64mp + job_id: jpv6l2dr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 5569.0 - throughput: 179.5654516071108 + inference_time: 5581.0 + throughput: 179.17935853789643 estimated_peak_memory_range: min: 4935680 - max: 21761848 + max: 22081280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jg9lklovg + job_id: jpxkl04l5 job_status: Passed torchscript_onnx: - inference_time: 6065.0 - throughput: 164.88046166529267 + inference_time: 6047.0 + throughput: 165.371258475277 estimated_peak_memory_range: - min: 4960256 - max: 10071344 + min: 4964352 + max: 9969520 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 376 - job_id: jp0zdz9e5 + job_id: j56y8o8yp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:13:56Z' + timestamp: '2024-12-11T22:36:17Z' - torchscript_onnx_tflite: - inference_time: 3872.0 - throughput: 258.26446280991735 + inference_time: 3870.0 + throughput: 258.3979328165375 estimated_peak_memory_range: min: 12288 - max: 41920896 + max: 45551936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 382 - job_id: jglvovwl5 + job_id: jgjvr37eg job_status: Passed torchscript_onnx_qnn: - inference_time: 3904.0 - throughput: 256.1475409836066 + inference_time: 3886.0 + throughput: 257.3340195573855 estimated_peak_memory_range: - min: 0 - max: 51517968 + min: 4931584 + max: 60844864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jp1474olp + job_id: j5mn09m9p job_status: Passed torchscript_onnx: - inference_time: 4130.0 - throughput: 242.13075060532688 + inference_time: 4159.0 + throughput: 240.44241404183697 estimated_peak_memory_range: - min: 5341184 - max: 129453248 + min: 5357568 + max: 130461696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 376 - job_id: jp8q6qr8p + job_id: jp3jzozng job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:13:57Z' + timestamp: '2024-12-11T22:36:18Z' - torchscript_onnx_tflite: - inference_time: 3175.0 - throughput: 314.96062992125985 + inference_time: 3822.0 + throughput: 261.6431187859759 estimated_peak_memory_range: - min: 8192 - max: 66074608 + min: 12288 + max: 45542480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 382 - job_id: jgo2184qp + job_id: jped76zv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3854.0 - throughput: 259.4706798131811 + inference_time: 3861.0 + throughput: 259.000259000259 estimated_peak_memory_range: min: 4927488 - max: 51300384 + max: 54475792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jgdx8x6lp + job_id: jgn6z1nq5 job_status: Passed torchscript_onnx: - inference_time: 3287.0 - throughput: 304.228780042592 + inference_time: 3989.0 + throughput: 250.68939583855604 estimated_peak_memory_range: - min: 0 - max: 79858752 + min: 5353472 + max: 85155968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 376 - job_id: jgkeoe0og + job_id: jgo2ldlkp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:13:58Z' + timestamp: '2024-12-11T22:36:19Z' - torchscript_onnx_tflite: - inference_time: 5435.0 - throughput: 183.99264029438822 + inference_time: 5452.0 + throughput: 183.41892883345562 estimated_peak_memory_range: - min: 237568 - max: 15277704 + min: 225280 + max: 15943136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 382 - job_id: jp3jxjozg + job_id: jgz3lzmx5 job_status: Passed torchscript_onnx_qnn: - inference_time: 5389.0 - throughput: 185.56318426424198 + inference_time: 5366.0 + throughput: 186.35855385762207 estimated_peak_memory_range: - min: 4956160 - max: 6725736 + min: 4964352 + max: 6110040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: j57ykynr5 + job_id: jprvlx07g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:13:47Z' + timestamp: '2024-12-11T22:36:09Z' - torchscript_onnx_tflite: - inference_time: 60009.0 - throughput: 16.664167041610426 + inference_time: 59989.0 + throughput: 16.669722782510128 estimated_peak_memory_range: - min: 274432 - max: 38967216 + min: 233472 + max: 39972032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 382 - job_id: jgo2o2ddp + job_id: j5wely7m5 job_status: Passed torchscript_onnx_qnn: - inference_time: 58954.0 - throughput: 16.96237744682295 + inference_time: 58912.0 + throughput: 16.97447039652363 estimated_peak_memory_range: - min: 1437696 - max: 6725120 + min: 1507328 + max: 11766960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jpxk3kr95 + job_id: jpy1o8xlp job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:13:49Z' + timestamp: '2024-12-11T22:36:11Z' - torchscript_onnx_tflite: - inference_time: 5436.0 - throughput: 183.95879323031642 + inference_time: 5519.0 + throughput: 181.19224497191522 estimated_peak_memory_range: - min: 229376 - max: 15952312 + min: 241664 + max: 20176800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 382 - job_id: jpv6e62m5 + job_id: jg9lzom8g job_status: Passed torchscript_onnx_qnn: - inference_time: 5380.0 - throughput: 185.87360594795538 + inference_time: 5423.0 + throughput: 184.39977872026554 estimated_peak_memory_range: - min: 4972544 - max: 6244032 + min: 7045120 + max: 8260752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: j5mnonkqp + job_id: jp0zmojn5 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:13:50Z' + timestamp: '2024-12-11T22:36:12Z' - torchscript_onnx_tflite: - inference_time: 10482.0 - throughput: 95.40164090822363 + inference_time: 10056.0 + throughput: 99.44311853619729 estimated_peak_memory_range: - min: 237568 - max: 61386608 + min: 233472 + max: 34652832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 382 - job_id: j5wed4435 + job_id: jp14noj7p job_status: Passed torchscript_onnx_qnn: - inference_time: 9427.0 - throughput: 106.07828577490187 + inference_time: 9461.0 + throughput: 105.6970721911003 estimated_peak_memory_range: - min: 610304 - max: 6298864 + min: 798720 + max: 6587776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jgn6o6qm5 + job_id: jp8qejxop job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:13:52Z' + timestamp: '2024-12-11T22:36:13Z' - torchscript_onnx_tflite: - inference_time: 5554.0 - throughput: 180.05041411595246 + inference_time: 5539.0 + throughput: 180.53800324968407 estimated_peak_memory_range: - min: 143360 - max: 20040432 + min: 237568 + max: 15864456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 382 - job_id: jped8d605 + job_id: jgdxd63zp job_status: Passed torchscript_onnx_qnn: - inference_time: 5374.0 - throughput: 186.08113137327874 + inference_time: 5468.0 + throughput: 182.882223847842 estimated_peak_memory_range: - min: 4956160 - max: 6290648 + min: 4997120 + max: 6256376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jprvovdeg + job_id: jgke264ng job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:13:53Z' + timestamp: '2024-12-11T22:36:14Z' - torchscript_onnx_tflite: - inference_time: 8213.0 - throughput: 121.75818823815902 + inference_time: 8151.0 + throughput: 122.684333210649 estimated_peak_memory_range: min: 217088 - max: 39122448 + max: 42393696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,7 +409,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 382 - job_id: jgz383z65 + job_id: j57yeo495 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 8055.0 + throughput: 124.14649286157666 + estimated_peak_memory_range: + min: 380928 + max: 6278528 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 374 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 374 + job_id: j5q6l4yop job_status: Passed reference_device_info: name: SA8775P ADP @@ -418,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:13:54Z' + timestamp: '2024-12-11T22:36:15Z' - torchscript_onnx_tflite: - inference_time: 8825.0 - throughput: 113.31444759206799 + inference_time: 8668.0 + throughput: 115.36686663590217 estimated_peak_memory_range: - min: 217088 - max: 39719552 + min: 98304 + max: 37791584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 382 - job_id: j5we8eyj5 + job_id: jp4lye115 job_status: Passed torchscript_onnx_qnn: - inference_time: 8433.0 - throughput: 118.58176212498518 + inference_time: 8476.0 + throughput: 117.98017932987258 estimated_peak_memory_range: min: 4947968 - max: 44889472 + max: 47634288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jpy1q124p + job_id: jglvywym5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -456,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:13:55Z' + timestamp: '2024-12-11T22:36:17Z' - torchscript_onnx_qnn: - inference_time: 5942.0 - throughput: 168.29350387075058 + inference_time: 5873.0 + throughput: 170.2707304614337 estimated_peak_memory_range: min: 4923392 max: 4923392 @@ -470,11 +485,11 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jp4lml4l5 + job_id: jp2krowqp job_status: Passed torchscript_onnx: - inference_time: 6805.0 - throughput: 146.95077149155034 + inference_time: 6823.0 + throughput: 146.5630954125751 estimated_peak_memory_range: min: 4931584 max: 4931584 @@ -485,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 376 - job_id: j5q6z61mp + job_id: jpv6l2lr5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -494,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:13:59Z' + timestamp: '2024-12-11T22:36:21Z' diff --git a/qai_hub_models/models/yolov11_seg/README.md b/qai_hub_models/models/yolov11_seg/README.md new file mode 100644 index 00000000..14fc095b --- /dev/null +++ b/qai_hub_models/models/yolov11_seg/README.md @@ -0,0 +1,63 @@ +[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md) + + +# [YOLOv11-Segmentation: Real-time object segmentation optimized for mobile and edge by Ultralytics](https://aihub.qualcomm.com/models/yolov11_seg) + +Ultralytics YOLOv11 is a machine learning model that predicts bounding boxes, segmentation masks and classes of objects in an image. + +This is based on the implementation of YOLOv11-Segmentation found [here](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/models/yolo/segment). This repository contains scripts for optimized on-device +export suitable to run on Qualcomm® devices. More details on model performance +accross various devices, can be found [here](https://aihub.qualcomm.com/models/yolov11_seg). + +[Sign up](https://myaccount.qualcomm.com/signup) to start using Qualcomm AI Hub and run these models on a hosted Qualcomm® device. + + + + +## Example & Usage + +Install the package via pip: +```bash +pip install "qai_hub_models[yolov11_seg]" +``` + + +Once installed, run the following simple CLI demo: + +```bash +python -m qai_hub_models.models.yolov11_seg.demo +``` +More details on the CLI tool can be found with the `--help` option. See +[demo.py](demo.py) for sample usage of the model including pre/post processing +scripts. Please refer to our [general instructions on using +models](../../../#getting-started) for more usage instructions. + +## Export for on-device deployment + +This repository contains export scripts that produce a model optimized for +on-device deployment. This can be run as follows: + +```bash +python -m qai_hub_models.models.yolov11_seg.export +``` +Additional options are documented with the `--help` option. Note that the above +script requires access to Deployment instructions for Qualcomm® AI Hub. + + +## License +* The license for the original implementation of YOLOv11-Segmentation can be found + [here](https://github.com/ultralytics/ultralytics/blob/main/LICENSE). +* The license for the compiled assets for on-device deployment can be found [here](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) + + +## References +* [Ultralytics YOLOv11 Docs: Instance Segmentation](https://docs.ultralytics.com/tasks/segment/) +* [Source Model Implementation](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/models/yolo/segment) + + + +## Community +* Join [our AI Hub Slack community](https://aihub.qualcomm.com/community/slack) to collaborate, post questions and learn more about on-device AI. +* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com). + + diff --git a/qai_hub_models/models/yolov11_seg/__init__.py b/qai_hub_models/models/yolov11_seg/__init__.py new file mode 100644 index 00000000..ccf734ad --- /dev/null +++ b/qai_hub_models/models/yolov11_seg/__init__.py @@ -0,0 +1,10 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from qai_hub_models.models._shared.yolo.app import ( # noqa: F401 + YoloSegmentationApp as App, +) + +from .model import MODEL_ID # noqa: F401 +from .model import YoloV11Segmentor as Model # noqa: F401 diff --git a/qai_hub_models/models/yolov11_seg/conftest.py b/qai_hub_models/models/yolov11_seg/conftest.py new file mode 100644 index 00000000..b0405649 --- /dev/null +++ b/qai_hub_models/models/yolov11_seg/conftest.py @@ -0,0 +1,39 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + +import inspect + +import pytest + +from qai_hub_models.models.yolov11_seg import Model +from qai_hub_models.utils.testing import skip_clone_repo_check + + +# Instantiate the model only once for all tests. +# Mock from_pretrained to always return the initialized model. +# This speeds up tests and limits memory leaks. +@pytest.fixture(scope="module", autouse=True) +def cached_from_pretrained(): + with pytest.MonkeyPatch.context() as mp: + pretrained_cache = {} + from_pretrained = Model.from_pretrained + sig = inspect.signature(from_pretrained) + + @skip_clone_repo_check + def _cached_from_pretrained(*args, **kwargs): + cache_key = str(args) + str(kwargs) + model = pretrained_cache.get(cache_key, None) + if model: + return model + else: + model = from_pretrained(*args, **kwargs) + pretrained_cache[cache_key] = model + return model + + _cached_from_pretrained.__signature__ = sig + + mp.setattr(Model, "from_pretrained", _cached_from_pretrained) + yield mp diff --git a/qai_hub_models/models/yolov11_seg/demo.py b/qai_hub_models/models/yolov11_seg/demo.py new file mode 100644 index 00000000..ac8ef9a9 --- /dev/null +++ b/qai_hub_models/models/yolov11_seg/demo.py @@ -0,0 +1,33 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from __future__ import annotations + +from qai_hub_models.models._shared.yolo.demo import yolo_segmentation_demo +from qai_hub_models.models.yolov11_seg.model import ( + MODEL_ASSET_VERSION, + MODEL_ID, + YoloV11Segmentor, +) +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset + +IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, "test_images/bus.jpg" +) +OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, "test_images/out_bus_with_mask.png" +) + + +def main(is_test: bool = False): + yolo_segmentation_demo( + YoloV11Segmentor, + MODEL_ID, + IMAGE_ADDRESS, + is_test=is_test, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/yolov11_seg/export.py b/qai_hub_models/models/yolov11_seg/export.py new file mode 100644 index 00000000..678ec1f5 --- /dev/null +++ b/qai_hub_models/models/yolov11_seg/export.py @@ -0,0 +1,221 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import os +import warnings +from pathlib import Path +from typing import Any, Optional, cast + +import qai_hub as hub +import torch + +from qai_hub_models.models.common import ExportResult, TargetRuntime +from qai_hub_models.models.yolov11_seg import Model +from qai_hub_models.utils.args import ( + export_parser, + get_input_spec_kwargs, + get_model_kwargs, +) +from qai_hub_models.utils.compare import torch_inference +from qai_hub_models.utils.input_spec import make_torch_inputs +from qai_hub_models.utils.printing import ( + print_inference_metrics, + print_on_target_demo_cmd, + print_profile_metrics_from_job, +) +from qai_hub_models.utils.qai_hub_helpers import ( + can_access_qualcomm_ai_hub, + export_without_hub_access, +) + + +def export_model( + device: Optional[str] = None, + chipset: Optional[str] = None, + skip_profiling: bool = False, + skip_inferencing: bool = False, + skip_downloading: bool = False, + skip_summary: bool = False, + output_dir: Optional[str] = None, + target_runtime: TargetRuntime = TargetRuntime.TFLITE, + compile_options: str = "", + profile_options: str = "", + **additional_model_kwargs, +) -> ExportResult | list[str]: + """ + This function executes the following recipe: + + 1. Instantiates a PyTorch model and converts it to a traced TorchScript format + 2. Compiles the model to an asset that can be run on device + 3. Profiles the model performance on a real device + 4. Inferences the model on sample inputs + 5. Downloads the model asset to the local directory + 6. Summarizes the results from profiling and inference + + Each of the last 4 steps can be optionally skipped using the input options. + + Parameters: + device: Device for which to export the model. + Full list of available devices can be found by running `hub.get_devices()`. + Defaults to DEFAULT_DEVICE if not specified. + chipset: If set, will choose a random device with this chipset. + Overrides the `device` argument. + skip_profiling: If set, skips profiling of compiled model on real devices. + skip_inferencing: If set, skips computing on-device outputs from sample data. + skip_downloading: If set, skips downloading of compiled model. + skip_summary: If set, skips waiting for and summarizing results + from profiling and inference. + output_dir: Directory to store generated assets (e.g. compiled model). + Defaults to `/build/`. + target_runtime: Which on-device runtime to target. Default is TFLite. + compile_options: Additional options to pass when submitting the compile job. + profile_options: Additional options to pass when submitting the profile job. + **additional_model_kwargs: Additional optional kwargs used to customize + `model_cls.from_pretrained` and `model.get_input_spec` + + Returns: + A struct of: + * A CompileJob object containing metadata about the compile job submitted to hub. + * An InferenceJob containing metadata about the inference job (None if inferencing skipped). + * A ProfileJob containing metadata about the profile job (None if profiling skipped). + """ + model_name = "yolov11_seg" + output_path = Path(output_dir or Path.cwd() / "build" / model_name) + if not device and not chipset: + raise ValueError("Device or Chipset must be provided.") + hub_device = hub.Device( + name=device or "", attributes=f"chipset:{chipset}" if chipset else None + ) + if not can_access_qualcomm_ai_hub(): + return export_without_hub_access( + "yolov11_seg", + "YOLOv11-Segmentation", + device or f"Device (Chipset {chipset})", + skip_profiling, + skip_inferencing, + skip_downloading, + skip_summary, + output_path, + target_runtime, + compile_options, + profile_options, + ) + + # On-device perf improves with I/O in channel_last format for runtimes + # that execute natively in channel_last format. + use_channel_last_format = target_runtime.channel_last_native_execution + + # 1. Instantiates a PyTorch model and converts it to a traced TorchScript format + model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs)) + input_spec = model.get_input_spec( + **get_input_spec_kwargs(model, additional_model_kwargs) + ) + + # Trace the model + source_model = torch.jit.trace( + model.to("cpu"), make_torch_inputs(input_spec), check_trace=False + ) + + # 2. Compiles the model to an asset that can be run on device + model_compile_options = model.get_hub_compile_options( + target_runtime, compile_options, hub_device + ) + print(f"Optimizing model {model_name} to run on-device") + submitted_compile_job = hub.submit_compile_job( + model=source_model, + input_specs=input_spec, + device=hub_device, + name=model_name, + options=model_compile_options, + ) + compile_job = cast(hub.client.CompileJob, submitted_compile_job) + + # 3. Profiles the model performance on a real device + profile_job: Optional[hub.client.ProfileJob] = None + if not skip_profiling: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print(f"Profiling model {model_name} on a hosted device.") + submitted_profile_job = hub.submit_profile_job( + model=compile_job.get_target_model(), + device=hub_device, + name=model_name, + options=profile_options_all, + ) + profile_job = cast(hub.client.ProfileJob, submitted_profile_job) + + # 4. Inferences the model on sample inputs + inference_job: Optional[hub.client.InferenceJob] = None + if not skip_inferencing: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print( + f"Running inference for {model_name} on a hosted device with example inputs." + ) + sample_inputs = model.sample_inputs( + input_spec, use_channel_last_format=use_channel_last_format + ) + submitted_inference_job = hub.submit_inference_job( + model=compile_job.get_target_model(), + inputs=sample_inputs, + device=hub_device, + name=model_name, + options=profile_options_all, + ) + inference_job = cast(hub.client.InferenceJob, submitted_inference_job) + + # 5. Downloads the model asset to the local directory + if not skip_downloading: + os.makedirs(output_path, exist_ok=True) + target_model: hub.Model = compile_job.get_target_model() # type: ignore + target_model.download(str(output_path / model_name)) + + # 6. Summarizes the results from profiling and inference + if not skip_summary and not skip_profiling: + assert profile_job is not None and profile_job.wait().success + profile_data: dict[str, Any] = profile_job.download_profile() # type: ignore + print_profile_metrics_from_job(profile_job, profile_data) + + if not skip_summary and not skip_inferencing: + sample_inputs = model.sample_inputs(use_channel_last_format=False) + torch_out = torch_inference( + model, sample_inputs, return_channel_last_output=use_channel_last_format + ) + assert inference_job is not None and inference_job.wait().success + inference_result: hub.client.DatasetEntries = inference_job.download_output_data() # type: ignore + + print_inference_metrics( + inference_job, + inference_result, + torch_out, + model.get_output_names(), + outputs_to_skip=[3], + ) + + if not skip_summary: + print_on_target_demo_cmd(compile_job, Path(__file__).parent, hub_device) + + return ExportResult( + compile_job=compile_job, + inference_job=inference_job, + profile_job=profile_job, + ) + + +def main(): + warnings.filterwarnings("ignore") + parser = export_parser(model_cls=Model, supports_qnn=False) + args = parser.parse_args() + export_model(**vars(args)) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/yolov11_seg/info.yaml b/qai_hub_models/models/yolov11_seg/info.yaml new file mode 100644 index 00000000..f22c5d05 --- /dev/null +++ b/qai_hub_models/models/yolov11_seg/info.yaml @@ -0,0 +1,44 @@ +name: YOLOv11-Segmentation +# id must match with the model dir name in qai_hub_models +id: yolov11_seg +status: public +headline: Real-time object segmentation optimized for mobile and edge by Ultralytics. +domain: Computer Vision +use_case: Semantic Segmentation +description: Ultralytics YOLOv11 is a machine learning model that predicts bounding boxes, segmentation + masks and classes of objects in an image. +tags: + - real-time +research_paper: https://docs.ultralytics.com/tasks/segment/ +research_paper_title: "Ultralytics YOLOv11 Docs: Instance Segmentation" +license: https://github.com/ultralytics/ultralytics/blob/main/LICENSE +deploy_license: https://github.com/ultralytics/ultralytics/blob/main/LICENSE +source_repo: + https://github.com/ultralytics/ultralytics/tree/main/ultralytics/models/yolo/segment +technical_details: + Model checkpoint: YOLO11N-Seg + Input resolution: 640x640 + Number of parameters: 2.9M + Model size: 11.1 MB + Number of output classes: 80 +applicable_scenarios: + - Factory Automation + - Robotic Navigation + - Camera +related_models: + - unet_segmentation + - sam + - fastsam_x + - mediapipe_selfie + - yolov8_seg + - ddrnet23_slim +form_factors: + - Phone + - Tablet + - IoT + - XR +has_static_banner: true +has_animated_banner: true +license_type: agpl-3.0 +deploy_license_type: agpl-3.0 +dataset: [] diff --git a/qai_hub_models/models/yolov11_seg/model.py b/qai_hub_models/models/yolov11_seg/model.py new file mode 100644 index 00000000..40522b0d --- /dev/null +++ b/qai_hub_models/models/yolov11_seg/model.py @@ -0,0 +1,126 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from __future__ import annotations + +import torch +import torch.nn as nn + +from qai_hub_models.models._shared.yolo.model import yolo_segment_postprocess +from qai_hub_models.utils.asset_loaders import SourceAsRoot, wipe_sys_modules +from qai_hub_models.utils.base_model import BaseModel, TargetRuntime +from qai_hub_models.utils.input_spec import InputSpec + +MODEL_ASSET_VERSION = 1 +MODEL_ID = __name__.split(".")[-2] + +SOURCE_REPO = "https://github.com/ultralytics/ultralytics" +SOURCE_REPO_COMMIT = "7a6c76d16c01f3e4ce9ed20eedc6ed27421b3268" + +SUPPORTED_WEIGHTS = [ + "yolo11n-seg.pt", + "yolo11s-seg.pt", + "yolo11m-seg.pt", + "yolo11l-seg.pt", + "yolo11x-seg.pt", +] +DEFAULT_WEIGHTS = "yolo11n-seg.pt" +NUM_ClASSES = 80 + + +class YoloV11Segmentor(BaseModel): + """Exportable YoloV11 segmentor, end-to-end.""" + + def __init__(self, model: nn.Module) -> None: + super().__init__() + self.model = model + + @classmethod + def from_pretrained(cls, ckpt_name: str = DEFAULT_WEIGHTS): + if ckpt_name not in SUPPORTED_WEIGHTS: + raise ValueError( + f"Unsupported checkpoint name provided {ckpt_name}.\n" + f"Supported checkpoints are {list(SUPPORTED_WEIGHTS)}." + ) + with SourceAsRoot( + SOURCE_REPO, + SOURCE_REPO_COMMIT, + MODEL_ID, + MODEL_ASSET_VERSION, + ): + + import ultralytics + + wipe_sys_modules(ultralytics) + from ultralytics import YOLO as ultralytics_YOLO + + model = ultralytics_YOLO(ckpt_name).model + assert isinstance(model, torch.nn.Module) + + return cls(model) + + def forward(self, image: torch.Tensor): + """ + Run YoloV11 on `image`, and produce a predicted set of bounding boxes and associated class probabilities. + + Parameters: + image: Pixel values pre-processed for encoder consumption. + Range: float[0, 1] + 3-channel Color Space: RGB + + Returns: + boxes: torch.Tensor + Bounding box locations. Shape is [batch, num preds, 4] where 4 == (x1, y1, x2, y2) + scores: torch.Tensor + Class scores multiplied by confidence: Shape is [batch, num_preds] + masks: torch.Tensor + Predicted masks: Shape is [batch, num_preds, 32] + classes: torch.Tensor + Shape is [batch, num_preds] where the last dim is the index of the most probable class of the prediction. + protos: torch.Tensor + Tensor of shape[batch, 32, mask_h, mask_w] + Multiply masks and protos to generate output masks. + """ + predictions = self.model(image) + boxes, scores, masks, classes = yolo_segment_postprocess( + predictions[0], NUM_ClASSES + ) + return boxes, scores, masks, classes, predictions[1][-1] + + @staticmethod + def get_input_spec( + batch_size: int = 1, + height: int = 640, + width: int = 640, + ) -> InputSpec: + """ + Returns the input specification (name -> (shape, type). This can be + used to submit profiling job on Qualcomm AI Hub. + """ + return {"image": ((batch_size, 3, height, width), "float32")} + + @staticmethod + def get_output_names() -> list[str]: + return ["boxes", "scores", "masks", "class_idx", "protos"] + + @staticmethod + def get_channel_last_inputs() -> list[str]: + return ["image"] + + def get_hub_profile_options( + self, target_runtime: TargetRuntime, other_profile_options: str = "" + ) -> str: + """ + Accuracy on ONNX runtime is not regained in NPU + Issue: https://github.com/qcom-ai-hub/tetracode/issues/13108 + """ + profile_options = super().get_hub_profile_options( + target_runtime, other_profile_options + ) + if ( + target_runtime == TargetRuntime.ONNX + and "--compute_unit" not in profile_options + ): + profile_options = profile_options + " --compute_unit cpu" + return profile_options diff --git a/qai_hub_models/models/yolov11_seg/perf.yaml b/qai_hub_models/models/yolov11_seg/perf.yaml new file mode 100644 index 00000000..3828c9c3 --- /dev/null +++ b/qai_hub_models/models/yolov11_seg/perf.yaml @@ -0,0 +1,347 @@ +aggregated: + supported_devices: + - Snapdragon 8 Elite QRD + - Samsung Galaxy S24 + - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ + - Samsung Galaxy S23 + - Samsung Galaxy S23 Ultra + - Samsung Galaxy S23+ + - Samsung Galaxy S22 5G + - Samsung Galaxy S22 Ultra 5G + - Samsung Galaxy S22+ 5G + - Samsung Galaxy Tab S8 + - Xiaomi 12 + - Xiaomi 12 Pro + - Samsung Galaxy S21 + - Samsung Galaxy S21 Ultra + - Samsung Galaxy S21+ + - Snapdragon X Elite CRD + - Snapdragon X Plus 8-Core CRD + - SA8775P ADP + - QCS8450 (Proxy) + - XR2 Gen 2 (Proxy) + - QCS8550 (Proxy) + - SA7255P ADP + - SA8255 (Proxy) + - SA8295P ADP + - SA8650 (Proxy) + supported_chipsets: + - Snapdragon® 8 Elite + - Snapdragon® 8 Gen 3 + - Snapdragon® 8 Gen 2 + - Snapdragon® 8 Gen 1 + - Snapdragon® 888 + - Snapdragon® X Elite + - Snapdragon® X Plus 8-Core + - SA8775P + - QCS8450 Proxy + - QCS8550 Proxy + - SA7255P + - SA8255P Proxy + - SA8295P + - SA8650P Proxy +models: +- name: YOLOv11-Segmentation + universal_assets: + torchscript_onnx_tflite: mnz1vjlxq + torchscript_onnx: mqkvko2wm + performance_metrics: + - torchscript_onnx_tflite: + inference_time: 6702.0 + throughput: 149.20919128618323 + estimated_peak_memory_range: + min: 4259840 + max: 24283976 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 429 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 429 + job_id: jped76yv5 + job_status: Passed + torchscript_onnx: + inference_time: 92665.0 + throughput: 10.791560999298548 + estimated_peak_memory_range: + min: 93134848 + max: 108129400 + primary_compute_unit: CPU + precision: fp32 + layer_info: + layers_on_npu: 0 + layers_on_gpu: 0 + layers_on_cpu: 303 + total_layers: 303 + job_id: jp0zmo6n5 + job_status: Passed + reference_device_info: + name: Samsung Galaxy S23 + os: '13' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 2 + timestamp: '2024-12-11T22:35:31Z' + - torchscript_onnx_tflite: + inference_time: 4834.0 + throughput: 206.8680182043856 + estimated_peak_memory_range: + min: 12288 + max: 58532432 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 429 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 429 + job_id: jgz3lznx5 + job_status: Passed + torchscript_onnx: + inference_time: 84448.0 + throughput: 11.841606669192876 + estimated_peak_memory_range: + min: 103317504 + max: 122308960 + primary_compute_unit: CPU + precision: fp32 + layer_info: + layers_on_npu: 0 + layers_on_gpu: 0 + layers_on_cpu: 303 + total_layers: 303 + job_id: jp8qej1op + job_status: Passed + reference_device_info: + name: Samsung Galaxy S24 + os: '14' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-12-11T22:35:32Z' + - torchscript_onnx_tflite: + inference_time: 4654.0 + throughput: 214.86892995272885 + estimated_peak_memory_range: + min: 12288 + max: 54453888 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 429 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 429 + job_id: j5wely4m5 + job_status: Passed + torchscript_onnx: + inference_time: 89230.0 + throughput: 11.20699316373417 + estimated_peak_memory_range: + min: 102162432 + max: 111049760 + primary_compute_unit: CPU + precision: fp32 + layer_info: + layers_on_npu: 0 + layers_on_gpu: 0 + layers_on_cpu: 303 + total_layers: 303 + job_id: jgke268ng + job_status: Passed + reference_device_info: + name: Snapdragon 8 Elite QRD + os: '15' + form_factor: Phone + os_name: Android + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-12-11T22:35:33Z' + - torchscript_onnx_tflite: + inference_time: 6597.0 + throughput: 151.58405335758678 + estimated_peak_memory_range: + min: 4571136 + max: 33037904 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 429 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 429 + job_id: jg9lzod8g + job_status: Passed + reference_device_info: + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: QCS8550 Proxy + timestamp: '2024-12-11T22:35:17Z' + - torchscript_onnx_tflite: + inference_time: 81084.0 + throughput: 12.332889349316758 + estimated_peak_memory_range: + min: 4669440 + max: 55719600 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 429 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 429 + job_id: jp14no67p + job_status: Passed + reference_device_info: + name: SA7255P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA7255P + timestamp: '2024-12-11T22:35:18Z' + - torchscript_onnx_tflite: + inference_time: 6665.0 + throughput: 150.03750937734435 + estimated_peak_memory_range: + min: 6316032 + max: 26487880 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 429 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 429 + job_id: jgdxd62zp + job_status: Passed + reference_device_info: + name: SA8255 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8255P Proxy + timestamp: '2024-12-11T22:35:19Z' + - torchscript_onnx_tflite: + inference_time: 11722.0 + throughput: 85.30967411704488 + estimated_peak_memory_range: + min: 4677632 + max: 43695328 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 429 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 429 + job_id: j57yeo995 + job_status: Passed + reference_device_info: + name: SA8295P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8295P + timestamp: '2024-12-11T22:35:20Z' + - torchscript_onnx_tflite: + inference_time: 6623.0 + throughput: 150.98897780462028 + estimated_peak_memory_range: + min: 4210688 + max: 31211136 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 429 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 429 + job_id: jp4lye315 + job_status: Passed + reference_device_info: + name: SA8650 (Proxy) + os: '13' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8650P Proxy + timestamp: '2024-12-11T22:35:21Z' + - torchscript_onnx_tflite: + inference_time: 10013.0 + throughput: 99.87016878058525 + estimated_peak_memory_range: + min: 4591616 + max: 56874880 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 429 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 429 + job_id: jpxkl0xl5 + job_status: Passed + reference_device_info: + name: SA8775P ADP + os: '14' + form_factor: Auto + os_name: Android + manufacturer: Qualcomm + chipset: SA8775P + timestamp: '2024-12-11T22:35:22Z' + - torchscript_onnx_tflite: + inference_time: 10333.0 + throughput: 96.77731539727088 + estimated_peak_memory_range: + min: 4583424 + max: 47315840 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 429 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 429 + job_id: j5mn0989p + job_status: Passed + reference_device_info: + name: QCS8450 (Proxy) + os: '13' + form_factor: Xr + os_name: Android + manufacturer: Qualcomm + chipset: QCS8450 Proxy + timestamp: '2024-12-11T22:35:23Z' + - torchscript_onnx: + inference_time: 32530.0 + throughput: 30.740854595757764 + estimated_peak_memory_range: + min: 122179584 + max: 122179584 + primary_compute_unit: CPU + precision: fp32 + layer_info: + layers_on_npu: 0 + layers_on_gpu: 0 + layers_on_cpu: 303 + total_layers: 303 + job_id: j5q6l4vop + job_status: Passed + reference_device_info: + name: Snapdragon X Elite CRD + os: '11' + form_factor: Compute + os_name: Windows + manufacturer: Qualcomm + chipset: Snapdragon® X Elite + timestamp: '2024-12-11T22:35:34Z' diff --git a/qai_hub_models/models/yolov11_seg/requirements.txt b/qai_hub_models/models/yolov11_seg/requirements.txt new file mode 100644 index 00000000..94980b0d --- /dev/null +++ b/qai_hub_models/models/yolov11_seg/requirements.txt @@ -0,0 +1,3 @@ +seaborn==0.11.0 +thop==0.1.1.post2209072238 +ultralytics==8.0.193 diff --git a/qai_hub_models/models/yolov11_seg/test.py b/qai_hub_models/models/yolov11_seg/test.py new file mode 100644 index 00000000..ff6853ab --- /dev/null +++ b/qai_hub_models/models/yolov11_seg/test.py @@ -0,0 +1,69 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +import numpy as np +import pytest +import torch +from ultralytics import YOLO as ultralytics_YOLO + +from qai_hub_models.models._shared.yolo.app import YoloSegmentationApp +from qai_hub_models.models._shared.yolo.model import yolo_segment_postprocess +from qai_hub_models.models.yolov11_seg.demo import IMAGE_ADDRESS, OUTPUT_IMAGE_ADDRESS +from qai_hub_models.models.yolov11_seg.demo import main as demo_main +from qai_hub_models.models.yolov11_seg.model import NUM_ClASSES, YoloV11Segmentor +from qai_hub_models.utils.asset_loaders import load_image +from qai_hub_models.utils.image_processing import preprocess_PIL_image +from qai_hub_models.utils.testing import assert_most_close, skip_clone_repo_check + +WEIGHTS = "yolo11n-seg.pt" + + +@skip_clone_repo_check +def test_task(): + """Verify that raw (numeric) outputs of both (QAIHM and non-qaihm) networks are the same.""" + qaihm_model = YoloV11Segmentor.from_pretrained(WEIGHTS) + qaihm_app = YoloSegmentationApp(qaihm_model) + source_model = ultralytics_YOLO(WEIGHTS).model + + processed_sample_image = preprocess_PIL_image(load_image(IMAGE_ADDRESS)) + processed_sample_image = qaihm_app.preprocess_input(processed_sample_image) + + with torch.no_grad(): + # original model output + source_out = source_model(processed_sample_image) + source_out_postprocessed = yolo_segment_postprocess(source_out[0], NUM_ClASSES) + source_out = [*source_out_postprocessed, source_out[1][-1]] + + # Qualcomm AI Hub Model output + qaihm_out_postprocessed = qaihm_model(processed_sample_image) + for i in range(0, len(source_out_postprocessed)): + assert np.allclose(source_out_postprocessed[i], qaihm_out_postprocessed[i]) + + +@skip_clone_repo_check +@pytest.mark.trace +def test_trace(): + net = YoloV11Segmentor.from_pretrained(WEIGHTS) + input_spec = net.get_input_spec() + trace = net.convert_to_torchscript(input_spec, check_trace=False) + + # Collect output via app for traced model + img = load_image(IMAGE_ADDRESS) + app = YoloSegmentationApp(trace) + out_imgs = app.predict(img) + + expected_out = load_image(OUTPUT_IMAGE_ADDRESS) + assert_most_close( + np.asarray(out_imgs[0], dtype=np.float32), + np.asarray(expected_out, dtype=np.float32), + 0.005, + rtol=0.02, + atol=1.5, + ) + + +@skip_clone_repo_check +def test_demo(): + # Run demo and verify it does not crash + demo_main(is_test=True) diff --git a/qai_hub_models/models/yolov3/README.md b/qai_hub_models/models/yolov3/README.md new file mode 100644 index 00000000..3517c616 --- /dev/null +++ b/qai_hub_models/models/yolov3/README.md @@ -0,0 +1,63 @@ +[![Qualcomm® AI Hub Models](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/quic-logo.jpg)](../../README.md) + + +# [Yolo-v3: Real-time object detection optimized for mobile and edge](https://aihub.qualcomm.com/models/yolov3) + +YoloV3 is a machine learning model that predicts bounding boxes and classes of objects in an image. + +This is based on the implementation of Yolo-v3 found [here](https://github.com/ultralytics/yolov3/tree/v8). This repository contains scripts for optimized on-device +export suitable to run on Qualcomm® devices. More details on model performance +accross various devices, can be found [here](https://aihub.qualcomm.com/models/yolov3). + +[Sign up](https://myaccount.qualcomm.com/signup) to start using Qualcomm AI Hub and run these models on a hosted Qualcomm® device. + + + + +## Example & Usage + +Install the package via pip: +```bash +pip install "qai_hub_models[yolov3]" +``` + + +Once installed, run the following simple CLI demo: + +```bash +python -m qai_hub_models.models.yolov3.demo +``` +More details on the CLI tool can be found with the `--help` option. See +[demo.py](demo.py) for sample usage of the model including pre/post processing +scripts. Please refer to our [general instructions on using +models](../../../#getting-started) for more usage instructions. + +## Export for on-device deployment + +This repository contains export scripts that produce a model optimized for +on-device deployment. This can be run as follows: + +```bash +python -m qai_hub_models.models.yolov3.export +``` +Additional options are documented with the `--help` option. Note that the above +script requires access to Deployment instructions for Qualcomm® AI Hub. + + +## License +* The license for the original implementation of Yolo-v3 can be found + [here](https://github.com/ultralytics/yolov3/blob/v8/LICENSE). +* The license for the compiled assets for on-device deployment can be found [here](https://github.com/ultralytics/yolov3/blob/v8/LICENSE) + + +## References +* [YOLOv3: An Incremental Improvement](https://arxiv.org/abs/1804.02767) +* [Source Model Implementation](https://github.com/ultralytics/yolov3/tree/v8) + + + +## Community +* Join [our AI Hub Slack community](https://aihub.qualcomm.com/community/slack) to collaborate, post questions and learn more about on-device AI. +* For questions or feedback please [reach out to us](mailto:ai-hub-support@qti.qualcomm.com). + + diff --git a/qai_hub_models/models/whisper_small_en/__init__.py b/qai_hub_models/models/yolov3/__init__.py similarity index 67% rename from qai_hub_models/models/whisper_small_en/__init__.py rename to qai_hub_models/models/yolov3/__init__.py index ac4b17de..30b00e91 100644 --- a/qai_hub_models/models/whisper_small_en/__init__.py +++ b/qai_hub_models/models/yolov3/__init__.py @@ -2,7 +2,7 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- -from qai_hub_models.models._shared.whisper.app import WhisperApp as App # noqa: F401 +from qai_hub_models.models.yolov3.app import YoloV3DetectionApp as App # noqa: F401 from .model import MODEL_ID # noqa: F401 -from .model import WhisperSmallEn as Model # noqa: F401 +from .model import YoloV3 as Model # noqa: F401 diff --git a/qai_hub_models/models/yolov3/app.py b/qai_hub_models/models/yolov3/app.py new file mode 100644 index 00000000..3b2db63d --- /dev/null +++ b/qai_hub_models/models/yolov3/app.py @@ -0,0 +1,50 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from __future__ import annotations + +import torch + +from qai_hub_models.models._shared.yolo.app import YoloObjectDetectionApp +from qai_hub_models.models._shared.yolo.utils import detect_postprocess +from qai_hub_models.models.yolov3.model import YoloV3 + + +class YoloV3DetectionApp(YoloObjectDetectionApp): + def check_image_size(self, pixel_values: torch.Tensor) -> None: + """ + Verify image size is a valid model input. Image size should be shape + [batch_size, num_channels, height, width], where height and width are multiples + of `YoloV5.STRIDE_MULTIPLE`. + """ + if len(pixel_values.shape) != 4: + raise ValueError("Pixel Values must be rank 4: [batch, channels, x, y]") + + if ( + pixel_values.shape[2] % YoloV3.STRIDE_MULTIPLE != 0 + or pixel_values.shape[3] % YoloV3.STRIDE_MULTIPLE != 0 + ): + raise ValueError( + f"Pixel values must have spatial dimensions (H & W) that are multiples of {YoloV3.STRIDE_MULTIPLE}." + ) + + def pre_nms_postprocess( + self, *predictions: torch.Tensor + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Process the output of the YOLO detector for input to NMS. + + Parameters: + detector_output: torch.Tensor + The output of Yolo detection model. Tensor shape varies by model implementation. + + Returns: + boxes: torch.Tensor + Bounding box locations. Shape is [batch, num preds, 4] where 4 == (x1, y1, x2, y2) + scores: torch.Tensor + class scores multiplied by confidence: Shape is [batch, num_preds] + class_idx: torch.Tensor + Shape is [batch, num_preds] where the last dim is the index of the most probable class of the prediction. + """ + return detect_postprocess(torch.cat(predictions, -1)) diff --git a/qai_hub_models/models/whisper_small_en/conftest.py b/qai_hub_models/models/yolov3/conftest.py similarity index 95% rename from qai_hub_models/models/whisper_small_en/conftest.py rename to qai_hub_models/models/yolov3/conftest.py index 13e24336..9ade62a2 100644 --- a/qai_hub_models/models/whisper_small_en/conftest.py +++ b/qai_hub_models/models/yolov3/conftest.py @@ -8,7 +8,7 @@ import pytest -from qai_hub_models.models.whisper_small_en import Model +from qai_hub_models.models.yolov3 import Model # Instantiate the model only once for all tests. diff --git a/qai_hub_models/models/yolov3/demo.py b/qai_hub_models/models/yolov3/demo.py new file mode 100644 index 00000000..112b80b4 --- /dev/null +++ b/qai_hub_models/models/yolov3/demo.py @@ -0,0 +1,33 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- + +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from qai_hub_models.models._shared.yolo.demo import yolo_detection_demo +from qai_hub_models.models.yolov3.app import YoloV3DetectionApp +from qai_hub_models.models.yolov3.model import MODEL_ASSET_VERSION, MODEL_ID, YoloV3 +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset + +IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, "yolov3_demo_640.jpg" +) +print(IMAGE_ADDRESS) + + +def main(is_test: bool = False): + yolo_detection_demo( + YoloV3, + MODEL_ID, + YoloV3DetectionApp, + IMAGE_ADDRESS, + YoloV3.STRIDE_MULTIPLE, + is_test=is_test, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/yolov3/evaluate.py b/qai_hub_models/models/yolov3/evaluate.py new file mode 100644 index 00000000..9f25c9dd --- /dev/null +++ b/qai_hub_models/models/yolov3/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.yolov3 import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["coco"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=250, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/yolov3/export.py b/qai_hub_models/models/yolov3/export.py new file mode 100644 index 00000000..2dc04696 --- /dev/null +++ b/qai_hub_models/models/yolov3/export.py @@ -0,0 +1,221 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import os +import warnings +from pathlib import Path +from typing import Any, Optional, cast + +import qai_hub as hub +import torch + +from qai_hub_models.models.common import ExportResult, TargetRuntime +from qai_hub_models.models.yolov3 import Model +from qai_hub_models.utils.args import ( + export_parser, + get_input_spec_kwargs, + get_model_kwargs, +) +from qai_hub_models.utils.compare import torch_inference +from qai_hub_models.utils.input_spec import make_torch_inputs +from qai_hub_models.utils.printing import ( + print_inference_metrics, + print_on_target_demo_cmd, + print_profile_metrics_from_job, +) +from qai_hub_models.utils.qai_hub_helpers import ( + can_access_qualcomm_ai_hub, + export_without_hub_access, +) + + +def export_model( + device: Optional[str] = None, + chipset: Optional[str] = None, + skip_profiling: bool = False, + skip_inferencing: bool = False, + skip_downloading: bool = False, + skip_summary: bool = False, + output_dir: Optional[str] = None, + target_runtime: TargetRuntime = TargetRuntime.TFLITE, + compile_options: str = "", + profile_options: str = "", + **additional_model_kwargs, +) -> ExportResult | list[str]: + """ + This function executes the following recipe: + + 1. Instantiates a PyTorch model and converts it to a traced TorchScript format + 2. Compiles the model to an asset that can be run on device + 3. Profiles the model performance on a real device + 4. Inferences the model on sample inputs + 5. Downloads the model asset to the local directory + 6. Summarizes the results from profiling and inference + + Each of the last 4 steps can be optionally skipped using the input options. + + Parameters: + device: Device for which to export the model. + Full list of available devices can be found by running `hub.get_devices()`. + Defaults to DEFAULT_DEVICE if not specified. + chipset: If set, will choose a random device with this chipset. + Overrides the `device` argument. + skip_profiling: If set, skips profiling of compiled model on real devices. + skip_inferencing: If set, skips computing on-device outputs from sample data. + skip_downloading: If set, skips downloading of compiled model. + skip_summary: If set, skips waiting for and summarizing results + from profiling and inference. + output_dir: Directory to store generated assets (e.g. compiled model). + Defaults to `/build/`. + target_runtime: Which on-device runtime to target. Default is TFLite. + compile_options: Additional options to pass when submitting the compile job. + profile_options: Additional options to pass when submitting the profile job. + **additional_model_kwargs: Additional optional kwargs used to customize + `model_cls.from_pretrained` and `model.get_input_spec` + + Returns: + A struct of: + * A CompileJob object containing metadata about the compile job submitted to hub. + * An InferenceJob containing metadata about the inference job (None if inferencing skipped). + * A ProfileJob containing metadata about the profile job (None if profiling skipped). + """ + model_name = "yolov3" + output_path = Path(output_dir or Path.cwd() / "build" / model_name) + if not device and not chipset: + raise ValueError("Device or Chipset must be provided.") + hub_device = hub.Device( + name=device or "", attributes=f"chipset:{chipset}" if chipset else None + ) + if not can_access_qualcomm_ai_hub(): + return export_without_hub_access( + "yolov3", + "Yolo-v3", + device or f"Device (Chipset {chipset})", + skip_profiling, + skip_inferencing, + skip_downloading, + skip_summary, + output_path, + target_runtime, + compile_options, + profile_options, + ) + + # On-device perf improves with I/O in channel_last format for runtimes + # that execute natively in channel_last format. + use_channel_last_format = target_runtime.channel_last_native_execution + + # 1. Instantiates a PyTorch model and converts it to a traced TorchScript format + model = Model.from_pretrained(**get_model_kwargs(Model, additional_model_kwargs)) + input_spec = model.get_input_spec( + **get_input_spec_kwargs(model, additional_model_kwargs) + ) + + # Trace the model + source_model = torch.jit.trace( + model.to("cpu"), make_torch_inputs(input_spec), check_trace=False + ) + + # 2. Compiles the model to an asset that can be run on device + model_compile_options = model.get_hub_compile_options( + target_runtime, compile_options, hub_device + ) + print(f"Optimizing model {model_name} to run on-device") + submitted_compile_job = hub.submit_compile_job( + model=source_model, + input_specs=input_spec, + device=hub_device, + name=model_name, + options=model_compile_options, + ) + compile_job = cast(hub.client.CompileJob, submitted_compile_job) + + # 3. Profiles the model performance on a real device + profile_job: Optional[hub.client.ProfileJob] = None + if not skip_profiling: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print(f"Profiling model {model_name} on a hosted device.") + submitted_profile_job = hub.submit_profile_job( + model=compile_job.get_target_model(), + device=hub_device, + name=model_name, + options=profile_options_all, + ) + profile_job = cast(hub.client.ProfileJob, submitted_profile_job) + + # 4. Inferences the model on sample inputs + inference_job: Optional[hub.client.InferenceJob] = None + if not skip_inferencing: + profile_options_all = model.get_hub_profile_options( + target_runtime, profile_options + ) + print( + f"Running inference for {model_name} on a hosted device with example inputs." + ) + sample_inputs = model.sample_inputs( + input_spec, use_channel_last_format=use_channel_last_format + ) + submitted_inference_job = hub.submit_inference_job( + model=compile_job.get_target_model(), + inputs=sample_inputs, + device=hub_device, + name=model_name, + options=profile_options_all, + ) + inference_job = cast(hub.client.InferenceJob, submitted_inference_job) + + # 5. Downloads the model asset to the local directory + if not skip_downloading: + os.makedirs(output_path, exist_ok=True) + target_model: hub.Model = compile_job.get_target_model() # type: ignore + target_model.download(str(output_path / model_name)) + + # 6. Summarizes the results from profiling and inference + if not skip_summary and not skip_profiling: + assert profile_job is not None and profile_job.wait().success + profile_data: dict[str, Any] = profile_job.download_profile() # type: ignore + print_profile_metrics_from_job(profile_job, profile_data) + + if not skip_summary and not skip_inferencing: + sample_inputs = model.sample_inputs(use_channel_last_format=False) + torch_out = torch_inference( + model, sample_inputs, return_channel_last_output=use_channel_last_format + ) + assert inference_job is not None and inference_job.wait().success + inference_result: hub.client.DatasetEntries = inference_job.download_output_data() # type: ignore + + print_inference_metrics( + inference_job, + inference_result, + torch_out, + model.get_output_names(), + outputs_to_skip=[2], + ) + + if not skip_summary: + print_on_target_demo_cmd(compile_job, Path(__file__).parent, hub_device) + + return ExportResult( + compile_job=compile_job, + inference_job=inference_job, + profile_job=profile_job, + ) + + +def main(): + warnings.filterwarnings("ignore") + parser = export_parser(model_cls=Model) + args = parser.parse_args() + export_model(**vars(args)) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/yolov3/info.yaml b/qai_hub_models/models/yolov3/info.yaml new file mode 100644 index 00000000..dcc67cf9 --- /dev/null +++ b/qai_hub_models/models/yolov3/info.yaml @@ -0,0 +1,42 @@ +name: Yolo-v3 +# id must match with the model dir name in qai_hub_models +id: yolov3 +status: public +private_perf_form_factors: + - Auto +headline: Real-time object detection optimized for mobile and edge. +domain: Computer Vision +description: YoloV3 is a machine learning model that predicts bounding boxes and classes + of objects in an image. +use_case: Object Detection +tags: + - real-time +research_paper: https://arxiv.org/abs/1804.02767 +research_paper_title: "YOLOv3: An Incremental Improvement" +license: https://github.com/ultralytics/yolov3/blob/v8/LICENSE +deploy_license: https://github.com/ultralytics/yolov3/blob/v8/LICENSE +source_repo: https://github.com/ultralytics/yolov3/tree/v8 +technical_details: + Model checkpoint: YoloV3 Tiny + Input resolution: 416p (416x416) + Number of parameters: 8.85M + Model size: 24.4 MB +applicable_scenarios: + - Factory Automation + - Robotic Navigation + - Camera +related_models: + - yolov6 + - yolov8_det + - yolov11_det +form_factors: + - Phone + - Tablet + - IoT + - XR +has_static_banner: true +has_animated_banner: true +license_type: gpl-3.0 +deploy_license_type: gpl-3.0 +dataset: [] +labels_file: coco_labels.txt diff --git a/qai_hub_models/models/yolov3/model.py b/qai_hub_models/models/yolov3/model.py new file mode 100644 index 00000000..08826e2b --- /dev/null +++ b/qai_hub_models/models/yolov3/model.py @@ -0,0 +1,134 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +from __future__ import annotations + +from importlib import reload +from typing import Optional + +import torch +import torch.nn as nn + +from qai_hub_models.models._shared.yolo.utils import detect_postprocess +from qai_hub_models.utils.asset_loaders import ( + CachedWebModelAsset, + SourceAsRoot, + find_replace_in_repo, +) +from qai_hub_models.utils.base_model import BaseModel +from qai_hub_models.utils.input_spec import InputSpec + +YOLOV3_SOURCE_REPOSITORY = "https://github.com/ultralytics/yolov3" +YOLOV3_SOURCE_REPO_COMMIT = "98068efebc699e7a652fb495f3e7a23bf296affd" # v8 version of YOLO v3 https://github.com/ultralytics/yolov3/tree/v8 +MODEL_ID = __name__.split(".")[-2] +DEFAULT_WEIGHTS = "yolov3-tiny.pt" +MODEL_ASSET_VERSION = 1 + + +class YoloV3(BaseModel): + """Exportable YoloV3 bounding box detector, end-to-end.""" + + def __init__(self, model: nn.Module, include_postprocessing: bool = True) -> None: + super().__init__() + self.model = model + self.include_postprocessing = include_postprocessing + + STRIDE_MULTIPLE = 32 + + @classmethod + def from_pretrained( + cls, + weights_name: Optional[str] = DEFAULT_WEIGHTS, + include_postprocessing: Optional[bool] = True, + ): + """Load YoloV3 from a weightfile created by the source YoloV3 repository.""" + checkpoint_path = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, weights_name + ).fetch() + # Load PyTorch model from disk + yolov3_model = _load_yolov3_source_model_from_weights(checkpoint_path) + return cls(yolov3_model, include_postprocessing) + + def forward(self, image: torch.tensor): + """ + Run YoloV3 on `image`, and produce a predicted set of bounding boxes and associated class probabilities. + + Parameters: + image: Pixel values pre-processed for encoder consumption. + Range: float[0, 1] + 3-channel Color Space: BGR + + Returns: + If self.include_postprocessing: + boxes: torch.Tensor + Bounding box locations. Shape [batch, num preds, 4] where 4 == (left_x, top_y, right_x, bottom_y) + scores: torch.Tensor + class scores multiplied by confidence: Shape is [batch, num_preds] + class_idx: torch.tensor + Shape is [batch, num_preds] where the last dim is the index of the most probable class of the prediction. + + else: + detector_output: torch.Tensor + Shape is [batch, num_preds, k] + where, k = # of classes + 5 + k is structured as follows [box_coordinates (4) , conf (1) , # of classes] + and box_coordinates are [x_center, y_center, w, h] + """ + predictions = self.model(image) + return ( + detect_postprocess(predictions[0]) + if self.include_postprocessing + else predictions + ) + + @staticmethod + def get_input_spec( + batch_size: int = 1, + height: int = 640, + width: int = 640, + ) -> InputSpec: + """ + Returns the input specification (name -> (shape, type). This can be + used to submit profiling job on Qualcomm AI Hub. + """ + return {"image": ((batch_size, 3, height, width), "float32")} + + @staticmethod + def get_output_names(include_postprocessing: bool = True) -> list[str]: + if include_postprocessing: + return ["boxes", "scores", "class_idx"] + return ["detector_output"] + + +def _load_yolov3_source_model_from_weights(weights_name: str) -> torch.nn.Module: + # Load YoloV3 model from the source repository using the given weights. + # Returns .models.yolo.Model + with SourceAsRoot( + YOLOV3_SOURCE_REPOSITORY, + YOLOV3_SOURCE_REPO_COMMIT, + MODEL_ID, + MODEL_ASSET_VERSION, + ) as repo_path: + find_replace_in_repo( + repo_path, + "models.py", + "io[..., :2] = torch.sigmoid(io[..., :2]) + self.grid", + "io = io.reshape(bs*self.na, self.ny, self.nx, self.no)\n io[..., :2] = torch.sigmoid(io[..., :2]) + self.grid", + ) + + import models + + reload(models) + cfg = "cfg/yolov3-tiny.cfg" + img_sz = (320, 192) + # necessary imports. `models` come from the yolov3 repo. + from models import Darknet + + model = Darknet(cfg, img_sz) + model.load_state_dict( + torch.load(weights_name, weights_only=True)["model"], strict=False + ) + + model.to("cpu").eval() + return model diff --git a/qai_hub_models/models/yolov3/perf.yaml b/qai_hub_models/models/yolov3/perf.yaml new file mode 100644 index 00000000..7c0bf6cb --- /dev/null +++ b/qai_hub_models/models/yolov3/perf.yaml @@ -0,0 +1,322 @@ +aggregated: + supported_devices: + - Snapdragon 8 Elite QRD + - Samsung Galaxy S24 + - Samsung Galaxy S24 Ultra + - Samsung Galaxy S24+ + - Samsung Galaxy S23 + - Samsung Galaxy S23 Ultra + - Samsung Galaxy S23+ + - Samsung Galaxy S22 5G + - Samsung Galaxy S22 Ultra 5G + - Samsung Galaxy S22+ 5G + - Samsung Galaxy Tab S8 + - Xiaomi 12 + - Xiaomi 12 Pro + - Samsung Galaxy S21 + - Samsung Galaxy S21 Ultra + - Samsung Galaxy S21+ + - Snapdragon X Elite CRD + - Snapdragon X Plus 8-Core CRD + - SA8775P ADP + - QCS8450 (Proxy) + - XR2 Gen 2 (Proxy) + - QCS8550 (Proxy) + - SA7255P ADP + - SA8255 (Proxy) + - SA8295P ADP + - SA8650 (Proxy) + supported_chipsets: + - Snapdragon® 8 Elite + - Snapdragon® 8 Gen 3 + - Snapdragon® 8 Gen 2 + - Snapdragon® 8 Gen 1 + - Snapdragon® 888 + - Snapdragon® X Elite + - Snapdragon® X Plus 8-Core + - SA8775P + - QCS8450 Proxy + - QCS8550 Proxy + - SA7255P + - SA8255P Proxy + - SA8295P + - SA8650P Proxy +models: +- name: Yolo-v3 + universal_assets: + torchscript_onnx_tflite: mqkvko0wm + torchscript_onnx: mn4l17y0q + performance_metrics: + - torchscript_onnx_tflite: + inference_time: 24935.0 + throughput: 40.104271104872666 + estimated_peak_memory_range: + min: 180224 + max: 17352832 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 163 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 163 + job_id: jglvyw1m5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 10405.0 + throughput: 96.10764055742432 + estimated_peak_memory_range: + min: 4960256 + max: 22416496 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 133 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 133 + job_id: jp14nov7p + job_status: Passed + torchscript_onnx: + inference_time: 14650.0 + throughput: 68.25938566552901 + estimated_peak_memory_range: + min: 10178560 + max: 24517952 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 124 + layers_on_gpu: 0 + layers_on_cpu: 10 + total_layers: 134 + job_id: jp8qej4op + job_status: Passed + reference_device_info: + name: Samsung Galaxy S23 + os: '13' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 2 + timestamp: '2024-12-11T22:34:49Z' + - torchscript_onnx_tflite: + inference_time: 17648.0 + throughput: 56.66364460562103 + estimated_peak_memory_range: + min: 188416 + max: 82882368 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 163 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 163 + job_id: j56y8odyp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 7427.0 + throughput: 134.64386697185944 + estimated_peak_memory_range: + min: 4931584 + max: 39492208 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 133 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 133 + job_id: jgdxd6zzp + job_status: Passed + torchscript_onnx: + inference_time: 11103.0 + throughput: 90.0657479960371 + estimated_peak_memory_range: + min: 0 + max: 65249456 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 124 + layers_on_gpu: 0 + layers_on_cpu: 10 + total_layers: 134 + job_id: jgke269ng + job_status: Passed + reference_device_info: + name: Samsung Galaxy S24 + os: '14' + form_factor: Phone + os_name: Android + manufacturer: Samsung + chipset: Snapdragon® 8 Gen 3 + timestamp: '2024-12-11T22:34:50Z' + - torchscript_onnx_tflite: + inference_time: 15003.0 + throughput: 66.65333599946678 + estimated_peak_memory_range: + min: 155648 + max: 79167760 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 163 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 163 + job_id: jp3jzowng + job_status: Passed + torchscript_onnx_qnn: + inference_time: 7731.0 + throughput: 129.34937265554262 + estimated_peak_memory_range: + min: 4927488 + max: 34655152 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 133 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 133 + job_id: j57yeo795 + job_status: Passed + torchscript_onnx: + inference_time: 11942.0 + throughput: 83.73806732540613 + estimated_peak_memory_range: + min: 3543040 + max: 44120592 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 124 + layers_on_gpu: 0 + layers_on_cpu: 10 + total_layers: 134 + job_id: j5q6l4mop + job_status: Passed + reference_device_info: + name: Snapdragon 8 Elite QRD + os: '15' + form_factor: Phone + os_name: Android + manufacturer: Qualcomm + chipset: Snapdragon® 8 Elite + timestamp: '2024-12-11T22:34:51Z' + - torchscript_onnx_tflite: + inference_time: 24289.0 + throughput: 41.170900407591915 + estimated_peak_memory_range: + min: 188416 + max: 15406736 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 163 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 163 + job_id: jgo2ld4kp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 9550.0 + throughput: 104.71204188481676 + estimated_peak_memory_range: + min: 4993024 + max: 6218824 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 133 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 133 + job_id: jp4lye915 + job_status: Passed + reference_device_info: + name: QCS8550 (Proxy) + os: '12' + form_factor: Iot + os_name: Android + manufacturer: Qualcomm + chipset: QCS8550 Proxy + timestamp: '2024-12-11T22:34:40Z' + - torchscript_onnx_tflite: + inference_time: 27309.0 + throughput: 36.61796477351789 + estimated_peak_memory_range: + min: 204800 + max: 76823536 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 163 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 163 + job_id: jg9lzox8g + job_status: Passed + torchscript_onnx_qnn: + inference_time: 12115.0 + throughput: 82.54230293025175 + estimated_peak_memory_range: + min: 4931584 + max: 35640448 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 133 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 133 + job_id: jp0zmovn5 + job_status: Passed + reference_device_info: + name: QCS8450 (Proxy) + os: '13' + form_factor: Xr + os_name: Android + manufacturer: Qualcomm + chipset: QCS8450 Proxy + timestamp: '2024-12-11T22:34:48Z' + - torchscript_onnx_qnn: + inference_time: 9718.0 + throughput: 102.90183165260342 + estimated_peak_memory_range: + min: 4923392 + max: 4923392 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 133 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 133 + job_id: jpxkl0dl5 + job_status: Passed + torchscript_onnx: + inference_time: 15732.0 + throughput: 63.56470887363336 + estimated_peak_memory_range: + min: 4939776 + max: 4939776 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 124 + layers_on_gpu: 0 + layers_on_cpu: 10 + total_layers: 134 + job_id: jglvywlm5 + job_status: Passed + reference_device_info: + name: Snapdragon X Elite CRD + os: '11' + form_factor: Compute + os_name: Windows + manufacturer: Qualcomm + chipset: Snapdragon® X Elite + timestamp: '2024-12-11T22:34:52Z' diff --git a/qai_hub_models/models/yolov3/requirements.txt b/qai_hub_models/models/yolov3/requirements.txt new file mode 100644 index 00000000..79ce9484 --- /dev/null +++ b/qai_hub_models/models/yolov3/requirements.txt @@ -0,0 +1,2 @@ +thop==0.1.1.post2209072238 +ultralytics==8.0.193 diff --git a/qai_hub_models/models/yolov6/perf.yaml b/qai_hub_models/models/yolov6/perf.yaml index 7c60237f..5912ed35 100644 --- a/qai_hub_models/models/yolov6/perf.yaml +++ b/qai_hub_models/models/yolov6/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: Yolo-v6 universal_assets: - torchscript_onnx_tflite: mng1p7w5n - torchscript_onnx: mnjxw9lkq + torchscript_onnx_tflite: mnz1vjj6q + torchscript_onnx: mqyv3jpvq performance_metrics: - torchscript_onnx_tflite: - inference_time: 5138.0 - throughput: 194.62826002335538 + inference_time: 5255.0 + throughput: 190.29495718363464 estimated_peak_memory_range: min: 221184 - max: 16248800 + max: 16121416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: jprvovleg + job_id: jgke26dwg job_status: Passed torchscript_onnx_qnn: - inference_time: 4818.0 - throughput: 207.55500207555002 + inference_time: 4813.0 + throughput: 207.7706212341575 estimated_peak_memory_range: - min: 4956160 - max: 15156368 + min: 4947968 + max: 18617912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jgo2o2ldp + job_id: j5wely835 job_status: Passed torchscript_onnx: - inference_time: 6228.0 - throughput: 160.56518946692358 + inference_time: 6282.0 + throughput: 159.1849729385546 estimated_peak_memory_range: min: 40960 - max: 10583920 + max: 10035920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jpxk3k095 + job_id: j5mn09o9p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:13:09Z' + timestamp: '2024-12-11T22:34:03Z' - torchscript_onnx_tflite: - inference_time: 3593.0 - throughput: 278.31895352073474 + inference_time: 3682.0 + throughput: 271.59152634437805 estimated_peak_memory_range: min: 12288 - max: 44894864 + max: 49145328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: jp2k4krmp + job_id: j5q6l4wnp job_status: Passed torchscript_onnx_qnn: - inference_time: 3466.0 - throughput: 288.51702250432777 + inference_time: 3456.0 + throughput: 289.35185185185185 estimated_peak_memory_range: min: 4931584 - max: 55086336 + max: 59243536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jpv6e6lm5 + job_id: jg9lzokwg job_status: Passed torchscript_onnx: - inference_time: 3999.0 - throughput: 250.06251562890722 + inference_time: 4129.0 + throughput: 242.18939210462582 estimated_peak_memory_range: - min: 5361664 - max: 116362592 + min: 5394432 + max: 116416000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: j5mnon9qp + job_id: jgn6z1oq5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:13:10Z' + timestamp: '2024-12-11T22:34:04Z' - torchscript_onnx_tflite: - inference_time: 3714.0 - throughput: 269.25148088314484 + inference_time: 3679.0 + throughput: 271.8129926610492 estimated_peak_memory_range: - min: 212992 - max: 41796240 + min: 151552 + max: 43286704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: jpy1q1o4p + job_id: jglvyw7j5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2860.0 - throughput: 349.65034965034965 + inference_time: 3446.0 + throughput: 290.1915264074289 estimated_peak_memory_range: - min: 1617920 - max: 48981008 + min: 4927488 + max: 54386128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jgjvovr8g + job_id: jp14no78p job_status: Passed torchscript_onnx: - inference_time: 3611.0 - throughput: 276.93159789531984 + inference_time: 4143.0 + throughput: 241.3709872073377 estimated_peak_memory_range: - min: 0 - max: 71849360 + min: 5337088 + max: 78905056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jgn6o61m5 + job_id: jprvlxo7g job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:13:11Z' + timestamp: '2024-12-11T22:34:05Z' - torchscript_onnx_tflite: - inference_time: 5049.0 - throughput: 198.05902158843335 + inference_time: 5243.0 + throughput: 190.7304978065993 estimated_peak_memory_range: min: 217088 - max: 15821160 + max: 17406728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: jp0zdzme5 + job_id: j56y8ov6p job_status: Passed torchscript_onnx_qnn: - inference_time: 4625.0 - throughput: 216.21621621621622 + inference_time: 4619.0 + throughput: 216.4970772894566 estimated_peak_memory_range: - min: 4947968 - max: 6204496 + min: 4997120 + max: 6174648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jped8d705 + job_id: jgdxd68rp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:12:59Z' + timestamp: '2024-12-11T22:33:54Z' - torchscript_onnx_tflite: - inference_time: 79388.0 - throughput: 12.59636217060513 + inference_time: 79390.0 + throughput: 12.596044841919637 estimated_peak_memory_range: - min: 290816 - max: 40285952 + min: 69632 + max: 40961520 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: jp8q6qe8p + job_id: jp3jzo83g job_status: Passed torchscript_onnx_qnn: - inference_time: 78375.0 - throughput: 12.759170653907496 + inference_time: 78372.0 + throughput: 12.759659061909867 estimated_peak_memory_range: - min: 3706880 - max: 9020576 + min: 1548288 + max: 11970416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -272,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: j5we8elj5 + job_id: jg9lzok8g job_status: Passed reference_device_info: name: SA7255P ADP @@ -281,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:13:02Z' + timestamp: '2024-12-11T22:33:56Z' - torchscript_onnx_tflite: - inference_time: 5244.0 - throughput: 190.69412662090008 + inference_time: 5240.0 + throughput: 190.83969465648855 estimated_peak_memory_range: - min: 12288 - max: 11374968 + min: 245760 + max: 11399992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: jgkeoe2og + job_id: jgo2ldmqp job_status: Passed torchscript_onnx_qnn: - inference_time: 4700.0 - throughput: 212.7659574468085 + inference_time: 4658.0 + throughput: 214.68441391155002 estimated_peak_memory_range: - min: 4984832 - max: 6369656 + min: 4956160 + max: 6093704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -310,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jg9lklzvg + job_id: jp14no77p job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -319,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:13:03Z' + timestamp: '2024-12-11T22:33:57Z' - torchscript_onnx_tflite: - inference_time: 7646.0 - throughput: 130.78733978550875 + inference_time: 7626.0 + throughput: 131.13034356150013 estimated_peak_memory_range: min: 217088 - max: 28094688 + max: 32663712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: j5q6z6lmp + job_id: jpv6l2ek5 job_status: Passed torchscript_onnx_qnn: - inference_time: 7416.0 - throughput: 134.8435814455232 + inference_time: 7116.0 + throughput: 140.5283867341203 estimated_peak_memory_range: - min: 16384 - max: 5695968 + min: 724992 + max: 6541152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -348,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jp1474nlp + job_id: jgdxd68zp job_status: Passed reference_device_info: name: SA8295P ADP @@ -357,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:13:04Z' + timestamp: '2024-12-11T22:33:58Z' - torchscript_onnx_tflite: - inference_time: 5162.0 - throughput: 193.72336303758235 + inference_time: 5354.0 + throughput: 186.77624206200971 estimated_peak_memory_range: - min: 233472 - max: 15900776 + min: 204800 + max: 16710920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: jglvovyl5 + job_id: jgjvr3ovg job_status: Passed torchscript_onnx_qnn: - inference_time: 4652.0 - throughput: 214.96130696474634 + inference_time: 4676.0 + throughput: 213.85799828913602 estimated_peak_memory_range: - min: 4993024 - max: 6457072 + min: 4976640 + max: 6621296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -386,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jgdx8xdlp + job_id: j57yeok95 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -395,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:13:05Z' + timestamp: '2024-12-11T22:34:00Z' - torchscript_onnx_tflite: - inference_time: 8134.0 - throughput: 122.94074256208508 + inference_time: 8344.0 + throughput: 119.84659635666347 estimated_peak_memory_range: min: 217088 - max: 40960576 + max: 43255216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -409,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: j56yry87p + job_id: jped768o5 job_status: Passed torchscript_onnx_qnn: - inference_time: 7766.0 - throughput: 128.76641771825908 + inference_time: 7774.0 + throughput: 128.63390789812195 estimated_peak_memory_range: - min: 57344 - max: 5480672 + min: 462848 + max: 6604176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -424,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: j57ykyor5 + job_id: jp4lyem15 job_status: Passed reference_device_info: name: SA8775P ADP @@ -433,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:13:06Z' + timestamp: '2024-12-11T22:34:01Z' - torchscript_onnx_tflite: - inference_time: 6608.0 - throughput: 151.3317191283293 + inference_time: 6518.0 + throughput: 153.42129487572876 estimated_peak_memory_range: - min: 221184 - max: 34830080 + min: 217088 + max: 37204048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -447,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: jp3jxjzzg + job_id: jgz3lz8o5 job_status: Passed torchscript_onnx_qnn: - inference_time: 6374.0 - throughput: 156.88735487919675 + inference_time: 6465.0 + throughput: 154.67904098994586 estimated_peak_memory_range: - min: 4915200 - max: 48003440 + min: 4931584 + max: 40728496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -462,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jp4lmlel5 + job_id: jpxkl03l5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -471,13 +471,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:13:08Z' - - torchscript_onnx: + timestamp: '2024-12-11T22:34:02Z' + - torchscript_onnx_qnn: + inference_time: 4928.0 + throughput: 202.92207792207793 + estimated_peak_memory_range: + min: 4923392 + max: 4923392 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 228 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 228 + job_id: j5wely8m5 + job_status: Passed + torchscript_onnx: inference_time: 6363.0 throughput: 157.15857300015716 estimated_peak_memory_range: - min: 8310784 - max: 8310784 + min: 8491008 + max: 8491008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -485,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jprvovxeg + job_id: jp2kro4qp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -494,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:13:12Z' + timestamp: '2024-12-11T22:34:06Z' diff --git a/qai_hub_models/models/yolov7/perf.yaml b/qai_hub_models/models/yolov7/perf.yaml index 980e184e..da4d0e67 100644 --- a/qai_hub_models/models/yolov7/perf.yaml +++ b/qai_hub_models/models/yolov7/perf.yaml @@ -22,6 +22,7 @@ aggregated: - QCS8450 (Proxy) - XR2 Gen 2 (Proxy) - QCS8550 (Proxy) + - SA7255P ADP - SA8255 (Proxy) - SA8295P ADP - SA8650 (Proxy) @@ -36,21 +37,22 @@ aggregated: - SA8775P - QCS8450 Proxy - QCS8550 Proxy + - SA7255P - SA8255P Proxy - SA8295P - SA8650P Proxy models: - name: Yolo-v7 universal_assets: - torchscript_onnx_tflite: mmx71vx2q - torchscript_onnx: mqex9074n + torchscript_onnx_tflite: mqyv3jj5q + torchscript_onnx: mqe7x2w4m performance_metrics: - torchscript_onnx_tflite: - inference_time: 15363.0 - throughput: 65.09145349215648 + inference_time: 15338.0 + throughput: 65.19754857217369 estimated_peak_memory_range: - min: 20480 - max: 18514304 + min: 49152 + max: 11247424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -58,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jgdx8x2lp + job_id: jgn6z1rk5 job_status: Passed torchscript_onnx_qnn: inference_time: 10640.0 throughput: 93.98496240601504 estimated_peak_memory_range: - min: 4956160 - max: 21498048 + min: 4964352 + max: 21612424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -73,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jp8q6qx8p + job_id: jp3jzoq3g job_status: Passed torchscript_onnx: - inference_time: 12262.0 - throughput: 81.55276463872126 + inference_time: 12368.0 + throughput: 80.85381630012937 estimated_peak_memory_range: - min: 86016 - max: 12592640 + min: 16384 + max: 12965120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -88,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 222 - job_id: j5we8e7j5 + job_id: jp4lye885 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -97,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:12:22Z' + timestamp: '2024-12-11T22:33:15Z' - torchscript_onnx_tflite: - inference_time: 10281.0 - throughput: 97.26680284019065 + inference_time: 10254.0 + throughput: 97.52291788570314 estimated_peak_memory_range: - min: 626688 - max: 47924176 + min: 622592 + max: 50439072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -111,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: j57yky4r5 + job_id: jprvlx10g job_status: Passed torchscript_onnx_qnn: - inference_time: 7246.0 - throughput: 138.0071763731714 + inference_time: 7118.0 + throughput: 140.48890137679123 estimated_peak_memory_range: - min: 76111872 - max: 148785696 + min: 4956160 + max: 83201040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -126,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jgkeoe4og + job_id: jgo2ldeqp job_status: Passed torchscript_onnx: - inference_time: 8246.0 - throughput: 121.2709192335678 + inference_time: 8093.0 + throughput: 123.56357345854443 estimated_peak_memory_range: - min: 6381568 - max: 132927760 + min: 6373376 + max: 132011168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -141,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 222 - job_id: jg9lklmvg + job_id: jpxkl0m35 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -150,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:12:23Z' + timestamp: '2024-12-11T22:33:16Z' - torchscript_onnx_tflite: - inference_time: 12198.0 - throughput: 81.98065256599442 + inference_time: 10690.0 + throughput: 93.54536950420955 estimated_peak_memory_range: min: 614400 - max: 73267216 + max: 43760112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -164,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: j5q67vw4p + job_id: jp2kro3rp job_status: Passed torchscript_onnx_qnn: - inference_time: 7087.0 - throughput: 141.10342881332016 + inference_time: 6061.0 + throughput: 164.98927569707968 estimated_peak_memory_range: - min: 5079040 - max: 74513696 + min: 4927488 + max: 73504112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -179,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: j5q6z6ymp + job_id: jpv6l24k5 job_status: Passed torchscript_onnx: - inference_time: 8166.0 - throughput: 122.45897624295861 + inference_time: 8134.0 + throughput: 122.94074256208508 estimated_peak_memory_range: - min: 6389760 - max: 93169232 + min: 6455296 + max: 94225728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -194,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 222 - job_id: jp1474jlp + job_id: j5mn094dp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -203,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:12:24Z' + timestamp: '2024-12-11T22:33:17Z' - torchscript_onnx_tflite: - inference_time: 15228.0 - throughput: 65.66850538481744 + inference_time: 15230.0 + throughput: 65.65988181221273 estimated_peak_memory_range: - min: 643072 - max: 18487632 + min: 634880 + max: 22075784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -217,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jpxk3k495 + job_id: jpy1o8v8p job_status: Passed torchscript_onnx_qnn: - inference_time: 10389.0 - throughput: 96.25565501973242 + inference_time: 10416.0 + throughput: 96.00614439324117 estimated_peak_memory_range: - min: 4964352 - max: 6216304 + min: 4997120 + max: 6444184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -232,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jglvovxl5 + job_id: jgjvr31vg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -241,21 +243,51 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:12:13Z' - - reference_device_info: + timestamp: '2024-12-11T22:33:05Z' + - torchscript_onnx_tflite: + inference_time: 108031.0 + throughput: 9.256602271570198 + estimated_peak_memory_range: + min: 712704 + max: 42882512 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 215 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 215 + job_id: jp0zmoe95 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 100670.0 + throughput: 9.933445912387008 + estimated_peak_memory_range: + min: 1568768 + max: 11936528 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 221 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 221 + job_id: jgz3lzwo5 + job_status: Passed + reference_device_info: name: SA7255P ADP os: '14' form_factor: Auto os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:12:15Z' + timestamp: '2024-12-11T22:33:08Z' - torchscript_onnx_tflite: - inference_time: 15315.0 - throughput: 65.2954619653934 + inference_time: 15382.0 + throughput: 65.0110518788194 estimated_peak_memory_range: - min: 765952 - max: 17236432 + min: 491520 + max: 19451120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -263,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jgn6o6nm5 + job_id: jp8qejwkp job_status: Passed torchscript_onnx_qnn: - inference_time: 10579.0 - throughput: 94.52689290103034 + inference_time: 10415.0 + throughput: 96.01536245799328 estimated_peak_memory_range: - min: 4947968 - max: 6421664 + min: 4960256 + max: 6127544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -278,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jgo2o2rdp + job_id: j5welyx35 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -287,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:12:16Z' + timestamp: '2024-12-11T22:33:09Z' - torchscript_onnx_tflite: - inference_time: 19755.0 - throughput: 50.62009617818274 + inference_time: 19706.0 + throughput: 50.74596569572719 estimated_peak_memory_range: - min: 618496 - max: 42830032 + min: 647168 + max: 47038720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -301,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jprvov0eg + job_id: jgke26rwg job_status: Passed torchscript_onnx_qnn: - inference_time: 13021.0 - throughput: 76.79901697258275 + inference_time: 13216.0 + throughput: 75.66585956416465 estimated_peak_memory_range: - min: 552960 - max: 6113936 + min: 61440 + max: 5764528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -316,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jpv6e6dm5 + job_id: jg9lzo8wg job_status: Passed reference_device_info: name: SA8295P ADP @@ -325,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:12:17Z' + timestamp: '2024-12-11T22:33:10Z' - torchscript_onnx_tflite: - inference_time: 15333.0 - throughput: 65.21880910454576 + inference_time: 15234.0 + throughput: 65.64264145989235 estimated_peak_memory_range: - min: 638976 - max: 21835840 + min: 626688 + max: 19397904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -339,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jp2k4kwmp + job_id: j5q6l49np job_status: Passed torchscript_onnx_qnn: - inference_time: 10562.0 - throughput: 94.6790380609733 + inference_time: 10540.0 + throughput: 94.87666034155598 estimated_peak_memory_range: - min: 4956160 - max: 6206232 + min: 5001216 + max: 6176264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -354,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jgjvov78g + job_id: jp14no38p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -363,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:12:18Z' + timestamp: '2024-12-11T22:33:11Z' - torchscript_onnx_tflite: - inference_time: 20426.0 - throughput: 48.957211397238815 + inference_time: 20500.0 + throughput: 48.78048780487805 estimated_peak_memory_range: - min: 634880 - max: 41466656 + min: 618496 + max: 43505696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -377,14 +409,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jpy1q1x4p + job_id: jglvywej5 job_status: Passed torchscript_onnx_qnn: - inference_time: 14774.0 - throughput: 67.68647624204684 + inference_time: 14790.0 + throughput: 67.6132521974307 estimated_peak_memory_range: - min: 393216 - max: 5908080 + min: 499712 + max: 6530416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -392,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jped8dz05 + job_id: jgdxd60rp job_status: Passed reference_device_info: name: SA8775P ADP @@ -401,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:12:20Z' + timestamp: '2024-12-11T22:33:12Z' - torchscript_onnx_tflite: - inference_time: 17644.0 - throughput: 56.67649059170256 + inference_time: 17522.0 + throughput: 57.07111060381235 estimated_peak_memory_range: - min: 647168 - max: 48320000 + min: 0 + max: 53229984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -415,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jp0zdzje5 + job_id: j56y8oq6p job_status: Passed torchscript_onnx_qnn: - inference_time: 12655.0 - throughput: 79.02015013828526 + inference_time: 12712.0 + throughput: 78.66582756450597 estimated_peak_memory_range: - min: 4931584 - max: 65351792 + min: 4952064 + max: 68838496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -430,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jgdxr28rp + job_id: j57yeo6v5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -439,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:12:08Z' + timestamp: '2024-12-11T22:33:13Z' - torchscript_onnx_qnn: - inference_time: 10994.0 - throughput: 90.95870474804438 + inference_time: 10995.0 + throughput: 90.95043201455206 estimated_peak_memory_range: min: 4923392 max: 4923392 @@ -453,14 +485,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: j56yry77p + job_id: jped762o5 job_status: Passed torchscript_onnx: - inference_time: 14522.0 - throughput: 68.86103842445944 + inference_time: 14222.0 + throughput: 70.3135986499789 estimated_peak_memory_range: - min: 9789440 - max: 9789440 + min: 10723328 + max: 10723328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -468,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 222 - job_id: jgdx8x3lp + job_id: jgn6z1xk5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -477,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:12:25Z' + timestamp: '2024-12-11T22:33:18Z' diff --git a/qai_hub_models/models/yolov7_quantized/evaluate.py b/qai_hub_models/models/yolov7_quantized/evaluate.py index 89c1054a..ab7b686a 100644 --- a/qai_hub_models/models/yolov7_quantized/evaluate.py +++ b/qai_hub_models/models/yolov7_quantized/evaluate.py @@ -25,6 +25,7 @@ def main(): model_cls=Model, default_split_size=250, supported_datasets=SUPPORTED_DATASETS, + supports_onnx=False, is_hub_quantized=True, ) args = parser.parse_args() diff --git a/qai_hub_models/models/yolov7_quantized/export.py b/qai_hub_models/models/yolov7_quantized/export.py index 0cdfba71..9302b363 100644 --- a/qai_hub_models/models/yolov7_quantized/export.py +++ b/qai_hub_models/models/yolov7_quantized/export.py @@ -241,7 +241,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, is_hub_quantized=True) + parser = export_parser(model_cls=Model, supports_onnx=False, is_hub_quantized=True) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/yolov7_quantized/info.yaml b/qai_hub_models/models/yolov7_quantized/info.yaml index 8e507abe..376adc43 100644 --- a/qai_hub_models/models/yolov7_quantized/info.yaml +++ b/qai_hub_models/models/yolov7_quantized/info.yaml @@ -11,6 +11,7 @@ use_case: Object Detection tags: - real-time - quantized +imsdk_supported: true research_paper: https://arxiv.org/abs/2207.02696 research_paper_title: 'YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors' diff --git a/qai_hub_models/models/yolov7_quantized/perf.yaml b/qai_hub_models/models/yolov7_quantized/perf.yaml index e2c39e5c..871262d3 100644 --- a/qai_hub_models/models/yolov7_quantized/perf.yaml +++ b/qai_hub_models/models/yolov7_quantized/perf.yaml @@ -50,15 +50,14 @@ aggregated: models: - name: Yolo-v7-Quantized universal_assets: - torchscript_onnx_tflite: mng1dr40n - torchscript_onnx: mn7lk478q + torchscript_onnx_tflite: mno63djgn performance_metrics: - torchscript_onnx_tflite: - inference_time: 4520.0 - throughput: 221.23893805309734 + inference_time: 4508.0 + throughput: 221.82786157941436 estimated_peak_memory_range: min: 20480 - max: 10398288 + max: 11540760 primary_compute_unit: NPU precision: int8 layer_info: @@ -66,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: j5mnow6dp + job_id: jpv6l3lk5 job_status: Passed torchscript_onnx_qnn: - inference_time: 5293.0 - throughput: 188.9287738522577 + inference_time: 5283.0 + throughput: 189.28639030853682 estimated_peak_memory_range: min: 12288 - max: 10677232 + max: 10759288 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jped8rko5 + job_id: jglvynyj5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -90,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T03:19:39Z' + timestamp: '2024-12-12T00:44:02Z' - torchscript_onnx_tflite: - inference_time: 2907.0 - throughput: 343.9972480220158 + inference_time: 2908.0 + throughput: 343.878954607978 estimated_peak_memory_range: min: 12288 - max: 42123776 + max: 45736496 primary_compute_unit: NPU precision: int8 layer_info: @@ -104,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jprvo420g + job_id: jgjvrxrvg job_status: Passed torchscript_onnx_qnn: - inference_time: 3528.0 - throughput: 283.4467120181406 + inference_time: 3439.0 + throughput: 290.7822041291073 estimated_peak_memory_range: - min: 1261568 - max: 60768624 + min: 1245184 + max: 60108688 primary_compute_unit: NPU precision: int8 layer_info: @@ -119,7 +118,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jgz38xro5 + job_id: jp3jzkz3g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -128,13 +127,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T03:20:06Z' + timestamp: '2024-12-12T00:44:28Z' - torchscript_onnx_tflite: - inference_time: 2507.0 - throughput: 398.8831272437176 + inference_time: 2479.0 + throughput: 403.3884630899556 estimated_peak_memory_range: min: 8192 - max: 35526528 + max: 38242528 primary_compute_unit: NPU precision: int8 layer_info: @@ -142,14 +141,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jp2k479rp + job_id: jped797o5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3731.0 - throughput: 268.0246582685607 + inference_time: 3735.0 + throughput: 267.7376171352075 estimated_peak_memory_range: min: 1241088 - max: 53389344 + max: 54973840 primary_compute_unit: NPU precision: int8 layer_info: @@ -157,7 +156,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jp147de8p + job_id: jgo2lylqp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -166,13 +165,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T03:20:34Z' + timestamp: '2024-12-12T00:44:54Z' - torchscript_onnx_tflite: - inference_time: 12195.0 - throughput: 82.00082000820008 + inference_time: 12636.0 + throughput: 79.13896802785692 estimated_peak_memory_range: min: 180224 - max: 54803536 + max: 57462576 primary_compute_unit: NPU precision: int8 layer_info: @@ -180,14 +179,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jp0zd1295 + job_id: j5welol35 job_status: Passed torchscript_onnx_qnn: - inference_time: 14177.0 - throughput: 70.53678493334274 + inference_time: 14442.0 + throughput: 69.24248719013987 estimated_peak_memory_range: - min: 1245184 - max: 9243120 + min: 1265664 + max: 7828752 primary_compute_unit: NPU precision: int8 layer_info: @@ -195,7 +194,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: j57ykjxv5 + job_id: jped796o5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -204,13 +203,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T03:15:37Z' + timestamp: '2024-12-12T00:45:20Z' - torchscript_onnx_tflite: - inference_time: 53225.0 - throughput: 18.788163457022076 + inference_time: 51089.0 + throughput: 19.573685137700874 estimated_peak_memory_range: - min: 11005952 - max: 40684704 + min: 32768 + max: 30450360 primary_compute_unit: GPU precision: int8 layer_info: @@ -218,7 +217,7 @@ models: layers_on_gpu: 146 layers_on_cpu: 82 total_layers: 228 - job_id: jp8q63mkp + job_id: jp14n0n8p job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -227,13 +226,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T03:10:45Z' + timestamp: '2024-12-12T00:40:33Z' - torchscript_onnx_tflite: - inference_time: 4484.0 - throughput: 223.01516503122212 + inference_time: 4510.0 + throughput: 221.72949002217294 estimated_peak_memory_range: - min: 40960 - max: 11769656 + min: 28672 + max: 11202360 primary_compute_unit: NPU precision: int8 layer_info: @@ -241,14 +240,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jgkeolqwg + job_id: jgdxdwdrp job_status: Passed torchscript_onnx_qnn: - inference_time: 4388.0 - throughput: 227.89425706472196 + inference_time: 3979.0 + throughput: 251.31942699170645 estimated_peak_memory_range: - min: 1290240 - max: 2420008 + min: 1277952 + max: 2473976 primary_compute_unit: NPU precision: int8 layer_info: @@ -256,7 +255,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jp4lmxv85 + job_id: j5weloy35 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -265,13 +264,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T03:16:04Z' + timestamp: '2024-12-12T00:45:46Z' - torchscript_onnx_tflite: - inference_time: 19799.0 - throughput: 50.5076013940098 + inference_time: 19739.0 + throughput: 50.66112771670297 estimated_peak_memory_range: - min: 184320 - max: 33101360 + min: 180224 + max: 34701824 primary_compute_unit: NPU precision: int8 layer_info: @@ -279,14 +278,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: j5q6z7rnp + job_id: jp4lyqy85 job_status: Passed torchscript_onnx_qnn: - inference_time: 19506.0 - throughput: 51.26627704296114 + inference_time: 19181.0 + throughput: 52.13492518638236 estimated_peak_memory_range: min: 1245184 - max: 6601888 + max: 11660960 primary_compute_unit: NPU precision: int8 layer_info: @@ -294,7 +293,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: j5mnow3dp + job_id: jp14n0o8p job_status: Passed reference_device_info: name: SA7255P ADP @@ -303,13 +302,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T03:16:57Z' + timestamp: '2024-12-12T00:46:39Z' - torchscript_onnx_tflite: - inference_time: 4516.0 - throughput: 221.43489813994685 + inference_time: 4480.0 + throughput: 223.21428571428572 estimated_peak_memory_range: - min: 40960 - max: 11550176 + min: 24576 + max: 11309832 primary_compute_unit: NPU precision: int8 layer_info: @@ -317,14 +316,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jglvo02j5 + job_id: jgn6z2zk5 job_status: Passed torchscript_onnx_qnn: - inference_time: 4443.0 - throughput: 225.07314877335133 + inference_time: 3981.0 + throughput: 251.19316754584275 estimated_peak_memory_range: - min: 1273856 - max: 2545208 + min: 36864 + max: 1344104 primary_compute_unit: NPU precision: int8 layer_info: @@ -332,7 +331,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jprvo4e0g + job_id: jgdxdw6rp job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -341,13 +340,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T03:17:25Z' + timestamp: '2024-12-12T00:47:05Z' - torchscript_onnx_tflite: - inference_time: 6117.0 - throughput: 163.47882949158085 + inference_time: 6196.0 + throughput: 161.39444803098775 estimated_peak_memory_range: - min: 172032 - max: 35926400 + min: 180224 + max: 39444992 primary_compute_unit: NPU precision: int8 layer_info: @@ -355,14 +354,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: j56yr3z6p + job_id: jpy1oeo8p job_status: Passed torchscript_onnx_qnn: - inference_time: 6427.0 - throughput: 155.59358954411078 + inference_time: 6412.0 + throughput: 155.95757953836556 estimated_peak_memory_range: - min: 1286144 - max: 7231104 + min: 1294336 + max: 7224496 primary_compute_unit: NPU precision: int8 layer_info: @@ -370,7 +369,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jpy1q468p + job_id: j57yezov5 job_status: Passed reference_device_info: name: SA8295P ADP @@ -379,13 +378,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T03:17:52Z' + timestamp: '2024-12-12T00:47:31Z' - torchscript_onnx_tflite: - inference_time: 4511.0 - throughput: 221.68033695411216 + inference_time: 4535.0 + throughput: 220.5071664829107 estimated_peak_memory_range: - min: 20480 - max: 10854008 + min: 16384 + max: 11424136 primary_compute_unit: NPU precision: int8 layer_info: @@ -393,14 +392,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jp3jx413g + job_id: jp0zmym95 job_status: Passed torchscript_onnx_qnn: - inference_time: 4560.0 - throughput: 219.2982456140351 + inference_time: 3955.0 + throughput: 252.84450063211125 estimated_peak_memory_range: - min: 1310720 - max: 2548600 + min: 1261568 + max: 2891216 primary_compute_unit: NPU precision: int8 layer_info: @@ -408,7 +407,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jp0zd1l95 + job_id: jpxklv035 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -417,13 +416,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T03:18:19Z' + timestamp: '2024-12-12T00:47:57Z' - torchscript_onnx_tflite: - inference_time: 6150.0 - throughput: 162.60162601626016 + inference_time: 6168.0 + throughput: 162.12710765239947 estimated_peak_memory_range: - min: 184320 - max: 33049280 + min: 163840 + max: 37168624 primary_compute_unit: NPU precision: int8 layer_info: @@ -431,14 +430,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jgo2o1nqp + job_id: jp8qeoekp job_status: Passed torchscript_onnx_qnn: - inference_time: 6093.0 - throughput: 164.12276382734285 + inference_time: 5547.0 + throughput: 180.27762754642148 estimated_peak_memory_range: - min: 1245184 - max: 6942160 + min: 1249280 + max: 7282240 primary_compute_unit: NPU precision: int8 layer_info: @@ -446,7 +445,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jgkeol3wg + job_id: j5mn0r9dp job_status: Passed reference_device_info: name: SA8775P ADP @@ -455,13 +454,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T03:18:45Z' + timestamp: '2024-12-12T00:48:23Z' - torchscript_onnx_tflite: - inference_time: 5162.0 - throughput: 193.72336303758235 + inference_time: 5009.0 + throughput: 199.64064683569575 estimated_peak_memory_range: - min: 192512 - max: 48279568 + min: 12288 + max: 50312048 primary_compute_unit: NPU precision: int8 layer_info: @@ -469,14 +468,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jpv6e1vk5 + job_id: j5q6l8lnp job_status: Passed torchscript_onnx_qnn: - inference_time: 5034.0 - throughput: 198.64918553833928 + inference_time: 4992.0 + throughput: 200.32051282051282 estimated_peak_memory_range: - min: 1269760 - max: 61042624 + min: 1273856 + max: 62658112 primary_compute_unit: NPU precision: int8 layer_info: @@ -484,7 +483,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jglvo03j5 + job_id: jp2kr8orp job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -493,10 +492,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T03:19:13Z' + timestamp: '2024-12-12T00:48:49Z' - torchscript_onnx_qnn: - inference_time: 4808.0 - throughput: 207.98668885191347 + inference_time: 4300.0 + throughput: 232.5581395348837 estimated_peak_memory_range: min: 1232896 max: 1232896 @@ -507,22 +506,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jpxk37y35 - job_status: Passed - torchscript_onnx: - inference_time: 163503.0 - throughput: 6.116095729130352 - estimated_peak_memory_range: - min: 60637184 - max: 60637184 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 415 - layers_on_gpu: 0 - layers_on_cpu: 58 - total_layers: 473 - job_id: jp147d88p + job_id: jg9lzvowg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -531,4 +515,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T03:20:59Z' + timestamp: '2024-12-12T00:46:13Z' diff --git a/qai_hub_models/models/yolov8_det/perf.yaml b/qai_hub_models/models/yolov8_det/perf.yaml index 552ae749..7a562491 100644 --- a/qai_hub_models/models/yolov8_det/perf.yaml +++ b/qai_hub_models/models/yolov8_det/perf.yaml @@ -44,15 +44,15 @@ aggregated: models: - name: YOLOv8-Detection universal_assets: - torchscript_onnx_tflite: mn1zy4wvm - torchscript_onnx: mnlvkpvom + torchscript_onnx_tflite: mm6kvrydn + torchscript_onnx: mq21408jm performance_metrics: - torchscript_onnx_tflite: - inference_time: 5168.0 - throughput: 193.4984520123839 + inference_time: 5198.0 + throughput: 192.3816852635629 estimated_peak_memory_range: min: 221184 - max: 17969656 + max: 17983288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -60,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jg9lklxqg + job_id: jp14nod8p job_status: Passed torchscript_onnx_qnn: - inference_time: 5222.0 - throughput: 191.49751053236307 + inference_time: 5236.0 + throughput: 190.98548510313216 estimated_peak_memory_range: - min: 4943872 - max: 18467840 + min: 4935680 + max: 21345624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -75,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: jpy1q1mrp + job_id: jp0zmo195 job_status: Passed torchscript_onnx: - inference_time: 6048.0 - throughput: 165.34391534391534 + inference_time: 6065.0 + throughput: 164.88046166529267 estimated_peak_memory_range: - min: 4964352 - max: 11809152 + min: 4968448 + max: 11563376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -90,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: jped8dy75 + job_id: jgz3lzvo5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -99,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:11:35Z' + timestamp: '2024-12-11T22:32:26Z' - torchscript_onnx_tflite: inference_time: 3711.0 throughput: 269.46914578280786 estimated_peak_memory_range: min: 12288 - max: 42876240 + max: 45986224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -113,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jp1474vkp + job_id: jgdxd6rrp job_status: Passed torchscript_onnx_qnn: - inference_time: 3704.0 - throughput: 269.97840172786175 + inference_time: 3707.0 + throughput: 269.75991367682764 estimated_peak_memory_range: - min: 4739072 - max: 55624976 + min: 4931584 + max: 60512608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -128,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: jp0zdz625 + job_id: jp8qej3kp job_status: Passed torchscript_onnx: - inference_time: 4263.0 - throughput: 234.57658925639223 + inference_time: 4203.0 + throughput: 237.92529145848204 estimated_peak_memory_range: min: 5365760 - max: 122644432 + max: 121965840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -143,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: jgz383nz5 + job_id: j5welym35 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -152,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:11:36Z' + timestamp: '2024-12-11T22:32:27Z' - torchscript_onnx_tflite: - inference_time: 3013.0 - throughput: 331.89512114171924 + inference_time: 3017.0 + throughput: 331.45508783559825 estimated_peak_memory_range: - min: 12288 - max: 39316064 + min: 8192 + max: 42896176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -166,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jgdx8xzkp + job_id: j57yeojv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3610.0 - throughput: 277.0083102493075 + inference_time: 3157.0 + throughput: 316.75641431738995 estimated_peak_memory_range: - min: 819200 - max: 47053072 + min: 0 + max: 50639872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -181,14 +181,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: jp8q6q1zp + job_id: jgke26lwg job_status: Passed torchscript_onnx: - inference_time: 3352.0 - throughput: 298.32935560859187 + inference_time: 4015.0 + throughput: 249.06600249066003 estimated_peak_memory_range: min: 0 - max: 72724960 + max: 73522864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -196,7 +196,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: j5we8e4z5 + job_id: jg9lzo9wg job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -205,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:11:37Z' + timestamp: '2024-12-11T22:32:28Z' - torchscript_onnx_tflite: - inference_time: 5168.0 - throughput: 193.4984520123839 + inference_time: 5170.0 + throughput: 193.42359767891682 estimated_peak_memory_range: - min: 237568 - max: 17620312 + min: 221184 + max: 18379160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -219,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: j57yky7q5 + job_id: jp4lyex85 job_status: Passed torchscript_onnx_qnn: - inference_time: 5009.0 - throughput: 199.64064683569575 + inference_time: 5034.0 + throughput: 198.64918553833928 estimated_peak_memory_range: - min: 4972544 - max: 6415776 + min: 5005312 + max: 6706600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -234,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: jgkeoe8yg + job_id: j5q6l47np job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -243,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:11:25Z' + timestamp: '2024-12-11T22:32:16Z' - torchscript_onnx_tflite: - inference_time: 71753.0 - throughput: 13.936699510821848 + inference_time: 71860.0 + throughput: 13.915947676036739 estimated_peak_memory_range: - min: 217088 - max: 38396128 + min: 327680 + max: 39058240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -257,7 +257,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jp4lml9q5 + job_id: jpxkl0735 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 71052.0 + throughput: 14.074199178066769 + estimated_peak_memory_range: + min: 1167360 + max: 11478240 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 285 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 285 + job_id: j56y8o36p job_status: Passed reference_device_info: name: SA7255P ADP @@ -266,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:11:28Z' + timestamp: '2024-12-11T22:32:19Z' - torchscript_onnx_tflite: - inference_time: 5198.0 - throughput: 192.3816852635629 + inference_time: 5175.0 + throughput: 193.23671497584542 estimated_peak_memory_range: min: 217088 - max: 17667960 + max: 14615688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -280,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jpxk3kdj5 + job_id: j5mn09wdp job_status: Passed torchscript_onnx_qnn: - inference_time: 5017.0 - throughput: 199.32230416583616 + inference_time: 5008.0 + throughput: 199.68051118210863 estimated_peak_memory_range: - min: 5013504 - max: 6337752 + min: 5001216 + max: 6340848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -295,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: j56yrywvp + job_id: jp3jzo43g job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -304,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:11:29Z' + timestamp: '2024-12-11T22:32:20Z' - torchscript_onnx_tflite: - inference_time: 9933.0 - throughput: 100.67451927917044 + inference_time: 9952.0 + throughput: 100.48231511254019 estimated_peak_memory_range: - min: 237568 - max: 28293200 + min: 217088 + max: 31393744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -318,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: j5mnondyp + job_id: jgn6z19k5 job_status: Passed torchscript_onnx_qnn: - inference_time: 8772.0 - throughput: 113.99908800729594 + inference_time: 9036.0 + throughput: 110.66843736166446 estimated_peak_memory_range: - min: 53248 - max: 5999712 + min: 49152 + max: 5630704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -333,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: jp3jxj6xg + job_id: jgo2ld1qp job_status: Passed reference_device_info: name: SA8295P ADP @@ -342,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:11:30Z' + timestamp: '2024-12-11T22:32:21Z' - torchscript_onnx_tflite: - inference_time: 5183.0 - throughput: 192.93845263360987 + inference_time: 5157.0 + throughput: 193.91118867558657 estimated_peak_memory_range: min: 217088 - max: 14703752 + max: 17621440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -356,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jgn6o6kv5 + job_id: jprvlx40g job_status: Passed torchscript_onnx_qnn: inference_time: 4997.0 throughput: 200.12007204322595 estimated_peak_memory_range: - min: 4988928 - max: 6542632 + min: 4980736 + max: 6195640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -371,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: jgo2o284p + job_id: jpv6l2zk5 job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -380,13 +395,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:11:31Z' + timestamp: '2024-12-11T22:32:22Z' - torchscript_onnx_tflite: - inference_time: 8152.0 - throughput: 122.6692836113837 + inference_time: 8122.0 + throughput: 123.12238364934746 estimated_peak_memory_range: - min: 237568 - max: 38301040 + min: 217088 + max: 41827392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -394,7 +409,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jprvovwvg + job_id: jp2kro7rp + job_status: Passed + torchscript_onnx_qnn: + inference_time: 7998.0 + throughput: 125.03125781445361 + estimated_peak_memory_range: + min: 12288 + max: 5933024 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 285 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 285 + job_id: jgjvr3kvg job_status: Passed reference_device_info: name: SA8775P ADP @@ -403,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:11:32Z' + timestamp: '2024-12-11T22:32:24Z' - torchscript_onnx_tflite: - inference_time: 8740.0 - throughput: 114.41647597254004 + inference_time: 8794.0 + throughput: 113.71389583807141 estimated_peak_memory_range: - min: 233472 - max: 34369296 + min: 229376 + max: 40861152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -417,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jp2k4kexp + job_id: jpy1o848p job_status: Passed torchscript_onnx_qnn: - inference_time: 7575.0 - throughput: 132.01320132013203 + inference_time: 7606.0 + throughput: 131.47515119642387 estimated_peak_memory_range: min: 4931584 - max: 41099248 + max: 46477392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -432,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: jgjvovq7g + job_id: jped764o5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -441,10 +471,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:11:34Z' + timestamp: '2024-12-11T22:32:25Z' - torchscript_onnx_qnn: - inference_time: 5513.0 - throughput: 181.3894431344096 + inference_time: 5495.0 + throughput: 181.98362147406732 estimated_peak_memory_range: min: 4923392 max: 4923392 @@ -455,7 +485,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: j5q6z6v7p + job_id: jglvyw0j5 job_status: Passed torchscript_onnx: inference_time: 6699.0 @@ -470,7 +500,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: jg9lkldqg + job_id: jp14noq8p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -479,4 +509,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:11:38Z' + timestamp: '2024-12-11T22:32:29Z' diff --git a/qai_hub_models/models/yolov8_det_quantized/evaluate.py b/qai_hub_models/models/yolov8_det_quantized/evaluate.py index d90ddc96..2c9e0b8f 100644 --- a/qai_hub_models/models/yolov8_det_quantized/evaluate.py +++ b/qai_hub_models/models/yolov8_det_quantized/evaluate.py @@ -25,6 +25,7 @@ def main(): model_cls=Model, default_split_size=250, supported_datasets=SUPPORTED_DATASETS, + supports_onnx=False, is_hub_quantized=True, ) args = parser.parse_args() diff --git a/qai_hub_models/models/yolov8_det_quantized/export.py b/qai_hub_models/models/yolov8_det_quantized/export.py index d7b51a09..29368766 100644 --- a/qai_hub_models/models/yolov8_det_quantized/export.py +++ b/qai_hub_models/models/yolov8_det_quantized/export.py @@ -241,7 +241,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, is_hub_quantized=True) + parser = export_parser(model_cls=Model, supports_onnx=False, is_hub_quantized=True) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/yolov8_det_quantized/info.yaml b/qai_hub_models/models/yolov8_det_quantized/info.yaml index 12ad9ffb..3636d2c7 100644 --- a/qai_hub_models/models/yolov8_det_quantized/info.yaml +++ b/qai_hub_models/models/yolov8_det_quantized/info.yaml @@ -11,6 +11,7 @@ description: Ultralytics YOLOv8 is a machine learning model that predicts boundi tags: - real-time - quantized +imsdk_supported: true research_paper: https://docs.ultralytics.com/tasks/detect/ research_paper_title: 'Ultralytics YOLOv8 Docs: Object Detection' license: https://github.com/ultralytics/ultralytics/blob/main/LICENSE diff --git a/qai_hub_models/models/yolov8_det_quantized/perf.yaml b/qai_hub_models/models/yolov8_det_quantized/perf.yaml index b5ad0879..a1349ad9 100644 --- a/qai_hub_models/models/yolov8_det_quantized/perf.yaml +++ b/qai_hub_models/models/yolov8_det_quantized/perf.yaml @@ -50,15 +50,14 @@ aggregated: models: - name: YOLOv8-Detection-Quantized universal_assets: - torchscript_onnx_tflite: mq9pgwl2n - torchscript_onnx: mn1z8kvzm + torchscript_onnx_tflite: mm5edpr4m performance_metrics: - torchscript_onnx_tflite: - inference_time: 1917.0 - throughput: 521.6484089723526 + inference_time: 1930.0 + throughput: 518.1347150259068 estimated_peak_memory_range: min: 12288 - max: 107602344 + max: 13299080 primary_compute_unit: NPU precision: int8 layer_info: @@ -66,14 +65,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 280 - job_id: jgo2o12xp + job_id: j5welo465 job_status: Passed torchscript_onnx_qnn: - inference_time: 2315.0 - throughput: 431.9654427645788 + inference_time: 2286.0 + throughput: 437.4453193350831 estimated_peak_memory_range: - min: 12288 - max: 10469640 + min: 1249280 + max: 12330576 primary_compute_unit: NPU precision: int8 layer_info: @@ -81,7 +80,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 273 - job_id: jpv6e10k5 + job_id: jp0zmy695 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -90,13 +89,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T03:04:40Z' + timestamp: '2024-12-12T00:29:41Z' - torchscript_onnx_tflite: - inference_time: 1282.0 - throughput: 780.0312012480499 + inference_time: 1288.0 + throughput: 776.3975155279503 estimated_peak_memory_range: min: 12288 - max: 28298896 + max: 31847664 primary_compute_unit: NPU precision: int8 layer_info: @@ -104,14 +103,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 280 - job_id: jpv6e16j5 + job_id: jp14n062p job_status: Passed torchscript_onnx_qnn: - inference_time: 1528.0 - throughput: 654.4502617801047 + inference_time: 1539.0 + throughput: 649.772579597141 estimated_peak_memory_range: - min: 0 - max: 32983168 + min: 1245184 + max: 39566016 primary_compute_unit: NPU precision: int8 layer_info: @@ -119,7 +118,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 273 - job_id: jped8reo5 + job_id: jp8qeo1kp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -128,13 +127,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T03:05:06Z' + timestamp: '2024-12-12T00:30:06Z' - torchscript_onnx_tflite: - inference_time: 1247.0 - throughput: 801.924619085806 + inference_time: 1210.0 + throughput: 826.4462809917355 estimated_peak_memory_range: min: 8192 - max: 22893104 + max: 27016208 primary_compute_unit: NPU precision: int8 layer_info: @@ -142,14 +141,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 280 - job_id: jped8rd15 + job_id: jgdxdw2ep job_status: Passed torchscript_onnx_qnn: - inference_time: 1527.0 - throughput: 654.8788474132285 + inference_time: 1281.0 + throughput: 780.64012490242 estimated_peak_memory_range: - min: 0 - max: 26548000 + min: 1241088 + max: 31448208 primary_compute_unit: NPU precision: int8 layer_info: @@ -157,7 +156,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 273 - job_id: jgz38xoo5 + job_id: jgke2z8wg job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -166,13 +165,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T03:05:33Z' + timestamp: '2024-12-12T00:30:33Z' - torchscript_onnx_tflite: - inference_time: 4661.0 - throughput: 214.5462347135808 + inference_time: 4618.0 + throughput: 216.54395842355999 estimated_peak_memory_range: - min: 61440 - max: 30050288 + min: 81920 + max: 34097312 primary_compute_unit: NPU precision: int8 layer_info: @@ -180,14 +179,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 280 - job_id: jp147d22p + job_id: j5welo435 job_status: Passed torchscript_onnx_qnn: - inference_time: 5951.0 - throughput: 168.03898504453034 + inference_time: 6098.0 + throughput: 163.98819285011479 estimated_peak_memory_range: - min: 1245184 - max: 9416816 + min: 0 + max: 7336720 primary_compute_unit: NPU precision: int8 layer_info: @@ -195,7 +194,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 273 - job_id: jg9lk3jwg + job_id: j5q6l8vnp job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -204,13 +203,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS6490 Proxy - timestamp: '2024-11-26T03:00:38Z' + timestamp: '2024-12-12T00:30:58Z' - torchscript_onnx_tflite: - inference_time: 46671.0 - throughput: 21.42658181740267 + inference_time: 46544.0 + throughput: 21.485046407700242 estimated_peak_memory_range: - min: 2752512 - max: 25074184 + min: 2719744 + max: 11329664 primary_compute_unit: NPU precision: int8 layer_info: @@ -218,7 +217,7 @@ models: layers_on_gpu: 1 layers_on_cpu: 0 total_layers: 280 - job_id: jg9lk30wg + job_id: jg9lzvdwg job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -227,13 +226,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8250 Proxy - timestamp: '2024-11-26T02:55:41Z' + timestamp: '2024-12-12T00:26:12Z' - torchscript_onnx_tflite: - inference_time: 1910.0 - throughput: 523.5602094240837 + inference_time: 1918.0 + throughput: 521.376433785193 estimated_peak_memory_range: - min: 12288 - max: 11115240 + min: 16384 + max: 41891536 primary_compute_unit: NPU precision: int8 layer_info: @@ -241,14 +240,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 280 - job_id: j57ykj2v5 + job_id: jp14n068p job_status: Passed torchscript_onnx_qnn: - inference_time: 2014.0 - throughput: 496.52432969215494 + inference_time: 2070.0 + throughput: 483.09178743961354 estimated_peak_memory_range: - min: 1265664 - max: 2642200 + min: 1310720 + max: 2571232 primary_compute_unit: NPU precision: int8 layer_info: @@ -256,7 +255,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 273 - job_id: jgdx8rerp + job_id: jglvynlj5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -265,13 +264,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T03:01:06Z' + timestamp: '2024-12-12T00:31:24Z' - torchscript_onnx_tflite: - inference_time: 11803.0 - throughput: 84.72422265525714 + inference_time: 11779.0 + throughput: 84.89685032685287 estimated_peak_memory_range: - min: 81920 - max: 22424288 + min: 86016 + max: 26401392 primary_compute_unit: NPU precision: int8 layer_info: @@ -279,14 +278,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 280 - job_id: jp4lmxn85 + job_id: jgdxdw2rp job_status: Passed torchscript_onnx_qnn: - inference_time: 12102.0 - throughput: 82.63097008758884 + inference_time: 12097.0 + throughput: 82.66512358435976 estimated_peak_memory_range: - min: 1290240 - max: 8259440 + min: 1277952 + max: 11647392 primary_compute_unit: NPU precision: int8 layer_info: @@ -294,7 +293,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 273 - job_id: j5mnowqdp + job_id: jpv6l3dk5 job_status: Passed reference_device_info: name: SA7255P ADP @@ -303,13 +302,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T03:02:00Z' + timestamp: '2024-12-12T00:32:17Z' - torchscript_onnx_tflite: - inference_time: 1931.0 - throughput: 517.8663904712585 + inference_time: 1920.0 + throughput: 520.8333333333334 estimated_peak_memory_range: - min: 12288 - max: 9476472 + min: 16384 + max: 11477240 primary_compute_unit: NPU precision: int8 layer_info: @@ -317,14 +316,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 280 - job_id: j5mnowedp + job_id: j57yez9v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2007.0 - throughput: 498.2561036372696 + inference_time: 2063.0 + throughput: 484.7309743092584 estimated_peak_memory_range: - min: 1261568 - max: 2600072 + min: 2330624 + max: 3587440 primary_compute_unit: NPU precision: int8 layer_info: @@ -332,7 +331,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 273 - job_id: jprvo480g + job_id: jgjvrx7vg job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -341,13 +340,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T03:02:25Z' + timestamp: '2024-12-12T00:32:43Z' - torchscript_onnx_tflite: - inference_time: 2828.0 - throughput: 353.6067892503536 + inference_time: 2842.0 + throughput: 351.8648838845883 estimated_peak_memory_range: - min: 65536 - max: 22585808 + min: 90112 + max: 21688784 primary_compute_unit: NPU precision: int8 layer_info: @@ -355,14 +354,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 280 - job_id: jpy1q4z8p + job_id: jpxklvx35 job_status: Passed torchscript_onnx_qnn: - inference_time: 3105.0 - throughput: 322.061191626409 + inference_time: 3088.0 + throughput: 323.8341968911917 estimated_peak_memory_range: - min: 1282048 - max: 7186320 + min: 1286144 + max: 7211296 primary_compute_unit: NPU precision: int8 layer_info: @@ -370,7 +369,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 273 - job_id: jpy1q4r8p + job_id: jg9lzvmwg job_status: Passed reference_device_info: name: SA8295P ADP @@ -379,13 +378,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T03:02:53Z' + timestamp: '2024-12-12T00:33:08Z' - torchscript_onnx_tflite: - inference_time: 1929.0 - throughput: 518.4033177812338 + inference_time: 1915.0 + throughput: 522.1932114882507 estimated_peak_memory_range: - min: 12288 - max: 6167544 + min: 16384 + max: 54341104 primary_compute_unit: NPU precision: int8 layer_info: @@ -393,14 +392,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 280 - job_id: jp0zd1495 + job_id: j5mn0r8dp job_status: Passed torchscript_onnx_qnn: - inference_time: 2087.0 - throughput: 479.1566842357451 + inference_time: 2071.0 + throughput: 482.8585224529213 estimated_peak_memory_range: - min: 1257472 - max: 2535944 + min: 1249280 + max: 2669624 primary_compute_unit: NPU precision: int8 layer_info: @@ -408,7 +407,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 273 - job_id: jp0zd1395 + job_id: jp14n0j8p job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -417,13 +416,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T03:03:20Z' + timestamp: '2024-12-12T00:33:35Z' - torchscript_onnx_tflite: - inference_time: 2876.0 - throughput: 347.70514603616135 + inference_time: 2881.0 + throughput: 347.1017007983339 estimated_peak_memory_range: - min: 86016 - max: 23320912 + min: 65536 + max: 27370256 primary_compute_unit: NPU precision: int8 layer_info: @@ -431,14 +430,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 280 - job_id: j5q6z70np + job_id: jgn6z2kk5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3089.0 - throughput: 323.72936225315635 + inference_time: 3102.0 + throughput: 322.3726627981947 estimated_peak_memory_range: min: 1245184 - max: 6957744 + max: 7200464 primary_compute_unit: NPU precision: int8 layer_info: @@ -446,7 +445,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 273 - job_id: jp8q630kp + job_id: jgdxdw3rp job_status: Passed reference_device_info: name: SA8775P ADP @@ -455,13 +454,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T03:03:47Z' + timestamp: '2024-12-12T00:34:01Z' - torchscript_onnx_tflite: - inference_time: 2133.0 - throughput: 468.8232536333802 + inference_time: 2139.0 + throughput: 467.50818139317437 estimated_peak_memory_range: min: 12288 - max: 30395728 + max: 33850512 primary_compute_unit: NPU precision: int8 layer_info: @@ -469,14 +468,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 280 - job_id: jgo2o1zqp + job_id: jprvlkw0g job_status: Passed torchscript_onnx_qnn: - inference_time: 2530.0 - throughput: 395.25691699604744 + inference_time: 2519.0 + throughput: 396.9829297340214 estimated_peak_memory_range: min: 1245184 - max: 37102928 + max: 41081328 primary_compute_unit: NPU precision: int8 layer_info: @@ -484,7 +483,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 273 - job_id: jgkeol7wg + job_id: jpxklv435 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -493,10 +492,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T03:04:13Z' + timestamp: '2024-12-12T00:34:28Z' - torchscript_onnx_qnn: - inference_time: 2285.0 - throughput: 437.636761487965 + inference_time: 2323.0 + throughput: 430.4778303917348 estimated_peak_memory_range: min: 1232896 max: 1232896 @@ -507,22 +506,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 273 - job_id: jpxk37n35 - job_status: Passed - torchscript_onnx: - inference_time: 67321.0 - throughput: 14.854205968419958 - estimated_peak_memory_range: - min: 26484736 - max: 26484736 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 489 - layers_on_gpu: 0 - layers_on_cpu: 63 - total_layers: 552 - job_id: jgjvo02vg + job_id: jp3jzk63g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -531,4 +515,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T03:05:59Z' + timestamp: '2024-12-12T00:31:51Z' diff --git a/qai_hub_models/models/yolov8_seg/__init__.py b/qai_hub_models/models/yolov8_seg/__init__.py index 54ba0b12..24c3ed06 100644 --- a/qai_hub_models/models/yolov8_seg/__init__.py +++ b/qai_hub_models/models/yolov8_seg/__init__.py @@ -2,6 +2,9 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- -from .app import YoloV8SegmentationApp as App # noqa: F401 +from qai_hub_models.models._shared.yolo.app import ( # noqa: F401 + YoloSegmentationApp as App, +) + from .model import MODEL_ID # noqa: F401 from .model import YoloV8Segmentor as Model # noqa: F401 diff --git a/qai_hub_models/models/yolov8_seg/app.py b/qai_hub_models/models/yolov8_seg/app.py deleted file mode 100644 index d314b383..00000000 --- a/qai_hub_models/models/yolov8_seg/app.py +++ /dev/null @@ -1,203 +0,0 @@ -# --------------------------------------------------------------------- -# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -# --------------------------------------------------------------------- -from __future__ import annotations - -from collections.abc import Callable - -import numpy as np -import torch -import torch.nn.functional as F -from PIL import Image -from torchvision.transforms import Resize -from ultralytics.utils.ops import process_mask - -from qai_hub_models.utils.bounding_box_processing import batched_nms -from qai_hub_models.utils.draw import create_color_map -from qai_hub_models.utils.image_processing import app_to_net_image_inputs - - -class YoloV8SegmentationApp: - """ - This class consists of light-weight "app code" that is required to perform end to end inference - with YoloV8 segmentation model. - - For a given image input, the app will: - * pre-process the image (convert to range[0, 1]) - * Run Yolo inference - * By default, - - post-processes output using non-maximum-suppression - - applies predicted mask on input image - """ - - def __init__( - self, - model: Callable[ - [torch.Tensor], - tuple[ - list[torch.Tensor], - list[torch.Tensor], - list[torch.Tensor], - list[torch.Tensor], - torch.Tensor, - ], - ], - nms_score_threshold: float = 0.45, - nms_iou_threshold: float = 0.7, - input_height: int = 640, - input_width: int = 640, - ): - """ - Initialize a YoloV8SegmentationApp application. - - Parameters: - model: torch.Tensor - YoloV8 segmentation model. - - Inputs: - Tensor of shape (N H W C x float32) with range [0, 1] and BGR channel layout. - - Outputs: - boxes: torch.Tensor - Bounding box locations. Shape is [batch, num preds, 4] where 4 == (x1, y1, x2, y2) - scores: torch.Tensor - Class scores multiplied by confidence: Shape is [batch, num_preds] - masks: torch.Tensor - Predicted masks: Shape is [batch, num_preds, 32] - classes: torch.Tensor - Shape is [batch, num_preds] where the last dim is the index of the most probable class of the prediction. - protos: torch.Tensor - Tensor of shape[batch, 32, mask_h, mask_w] - Multiply masks and protos to generate output masks. - - nms_score_threshold - Score threshold for non maximum suppression. - - nms_iou_threshold - Intersection over Union threshold for non maximum suppression. - """ - self.model = model - self.nms_score_threshold = nms_score_threshold - self.nms_iou_threshold = nms_iou_threshold - self.input_height = input_height - self.input_width = input_width - - def check_image_size(self, pixel_values: torch.Tensor) -> None: - """ - Verify image size is valid model input. - """ - return all([s % 32 == 0 for s in pixel_values.shape[-2:]]) - - def preprocess_input(self, pixel_values: torch.Tensor) -> torch.Tensor: - img_size = (self.input_height, self.input_width) - return Resize(img_size)(pixel_values) - - def predict(self, *args, **kwargs): - # See predict_boxes_from_image. - return self.predict_segmentation_from_image(*args, **kwargs) - - def predict_segmentation_from_image( - self, - pixel_values_or_image: torch.Tensor | np.ndarray | Image | list[Image], - raw_output: bool = False, - ) -> tuple[ - list[torch.Tensor], list[torch.Tensor], list[torch.Tensor], list[torch.Tensor] - ] | list[Image.Image]: - """ - From the provided image or tensor, predict the bounding boxes & classes of objects detected within. - - Parameters: - pixel_values_or_image: torch.Tensor - PIL image - or - numpy array (N H W C x uint8) or (H W C x uint8) -- both BGR channel layout - or - pyTorch tensor (N C H W x fp32, value range is [0, 1]), BGR channel layout - - raw_output: bool - See "returns" doc section for details. - - Returns: - If raw_output is false or pixel_values_or_image is not a PIL image, returns: - pred_boxes: list[torch.Tensor] - List of predicted boxes for all the batches. - Each pred_box is of shape [num_boxes, 4] - pred_scores: list[torch.Tensor] - List of scores for each predicted box for all the batches. - Each pred_score is of shape [num_boxes] - pred_masks: list[torch.Tensor] - List of predicted masks for all the batches. - Each pred_mask is of shape [num_boxes, 32] - pred_classes: list[torch.Tensor] - List of predicted class for all the batches. - Each pred_class is of shape [num_boxes] - - Otherwise, returns: - image_with_masks: list[PIL.Image] - Input image with predicted masks applied - """ - - # Input Prep - NHWC_int_numpy_frames, NCHW_fp32_torch_frames = app_to_net_image_inputs( - pixel_values_or_image - ) - - # Cache input spatial dimension to use for post-processing - input_h, input_w = NCHW_fp32_torch_frames.shape[2:] - NCHW_fp32_torch_frames = self.preprocess_input(NCHW_fp32_torch_frames) - - self.check_image_size(NCHW_fp32_torch_frames) - - # Run prediction - pred_boxes, pred_scores, pred_masks, pred_class_idx, proto = self.model( - NCHW_fp32_torch_frames - ) - - # Non Maximum Suppression on each batch - pred_boxes, pred_scores, pred_class_idx, pred_masks = batched_nms( - self.nms_iou_threshold, - self.nms_score_threshold, - pred_boxes, - pred_scores, - pred_class_idx, - pred_masks, - ) - - # Process mask and upsample to input shape - for batch_idx in range(len(pred_masks)): - pred_masks[batch_idx] = process_mask( - proto[batch_idx], - pred_masks[batch_idx], - pred_boxes[batch_idx], - (self.input_height, self.input_width), - upsample=True, - ).numpy() - - # Resize masks to match with input image shape - pred_masks = F.interpolate( - input=torch.Tensor(pred_masks), - size=(input_h, input_w), - mode="bilinear", - align_corners=False, - ) - - # Return raw output if requested - if raw_output or isinstance(pixel_values_or_image, torch.Tensor): - return (pred_boxes, pred_scores, pred_masks, pred_class_idx) - - # Create color map and convert segmentation mask to RGB image - pred_mask_img = torch.argmax(pred_masks, 1) - - # Overlay the segmentation masks on the image. - color_map = create_color_map(pred_mask_img.max().item() + 1) - out = [] - for i, img_tensor in enumerate(NHWC_int_numpy_frames): - out.append( - Image.blend( - Image.fromarray(img_tensor), - Image.fromarray(color_map[pred_mask_img[i]]), - alpha=0.5, - ) - ) - return out diff --git a/qai_hub_models/models/yolov8_seg/demo.py b/qai_hub_models/models/yolov8_seg/demo.py index b2278023..79c67656 100644 --- a/qai_hub_models/models/yolov8_seg/demo.py +++ b/qai_hub_models/models/yolov8_seg/demo.py @@ -4,24 +4,13 @@ # --------------------------------------------------------------------- from __future__ import annotations -from qai_hub_models.models.yolov8_seg.app import YoloV8SegmentationApp +from qai_hub_models.models._shared.yolo.demo import yolo_segmentation_demo from qai_hub_models.models.yolov8_seg.model import ( - DEFAULT_WEIGHTS, MODEL_ASSET_VERSION, MODEL_ID, YoloV8Segmentor, ) -from qai_hub_models.utils.args import ( - demo_model_from_cli_args, - get_model_cli_parser, - get_on_device_demo_parser, - validate_on_device_demo_args, -) -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image -from qai_hub_models.utils.base_model import BaseModel, TargetRuntime -from qai_hub_models.utils.display import display_or_save_image - -WEIGHTS_HELP_MSG = f"YoloV8-Segment checkpoint name. Valid checkpoints can be found in qai_hub_models/{MODEL_ID}/model.py" +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( MODEL_ID, MODEL_ASSET_VERSION, "test_images/bus.jpg" @@ -31,66 +20,10 @@ ) -def yolov8_seg_demo( - model_type: type[BaseModel], - default_weights: str, - weights_help_msg: str, - default_image: CachedWebModelAsset, - stride_multiple: int | None = None, - is_test: bool = False, -): - # Demo parameters - parser = get_model_cli_parser(model_type) - parser = get_on_device_demo_parser( - parser, available_target_runtimes=[TargetRuntime.TFLITE], add_output_dir=True - ) - image_help = "image file path or URL." - if stride_multiple: - image_help = f"{image_help} Image spatial dimensions (x and y) must be multiples of {stride_multiple}." - - parser.add_argument( - "--image", - type=str, - help="Test image file path or URL", - ) - parser.add_argument( - "--score-threshold", - type=float, - default=0.45, - help="Score threshold for NonMaximumSuppression", - ) - parser.add_argument( - "--iou-threshold", - type=float, - default=0.7, - help="Intersection over Union (IoU) threshold for NonMaximumSuppression", - ) - args = parser.parse_args([] if is_test else None) - validate_on_device_demo_args(args, MODEL_ID) - - if args.image is None: - image_path = default_image.fetch() - else: - image_path = args.image - - # Load image & model - model = demo_model_from_cli_args(model_type, MODEL_ID, args) - app = YoloV8SegmentationApp(model, args.score_threshold, args.iou_threshold) - - print("Model Loaded") - - image = load_image(image_path) - image_annotated = app.predict_segmentation_from_image(image)[0] - - if not is_test: - display_or_save_image(image_annotated, args.output_dir) - - def main(is_test: bool = False): - yolov8_seg_demo( + yolo_segmentation_demo( YoloV8Segmentor, - DEFAULT_WEIGHTS, - WEIGHTS_HELP_MSG, + MODEL_ID, IMAGE_ADDRESS, is_test=is_test, ) diff --git a/qai_hub_models/models/yolov8_seg/model.py b/qai_hub_models/models/yolov8_seg/model.py index 196e8286..6237bac7 100644 --- a/qai_hub_models/models/yolov8_seg/model.py +++ b/qai_hub_models/models/yolov8_seg/model.py @@ -8,10 +8,7 @@ import torch.nn as nn from ultralytics import YOLO as ultralytics_YOLO -from qai_hub_models.models._shared.yolo.utils import ( - get_most_likely_score, - transform_box_layout_xywh2xyxy, -) +from qai_hub_models.models._shared.yolo.model import yolo_segment_postprocess from qai_hub_models.utils.base_model import BaseModel from qai_hub_models.utils.input_spec import InputSpec @@ -26,6 +23,7 @@ "yolov8x-seg.pt", ] DEFAULT_WEIGHTS = "yolov8n-seg.pt" +NUM_ClASSES = 80 class YoloV8Segmentor(BaseModel): @@ -68,7 +66,9 @@ def forward(self, image: torch.Tensor): Multiply masks and protos to generate output masks. """ predictions = self.model(image) - boxes, scores, masks, classes = yolov8_segment_postprocess(predictions[0]) + boxes, scores, masks, classes = yolo_segment_postprocess( + predictions[0], NUM_ClASSES + ) return boxes, scores, masks, classes, predictions[1][-1] @staticmethod @@ -94,43 +94,3 @@ def get_channel_last_inputs() -> list[str]: @staticmethod def get_channel_last_outputs() -> list[str]: return ["masks"] - - -def yolov8_segment_postprocess(detector_output: torch.Tensor): - """ - Post processing to break YoloV8 detector output into multiple, consumable tensors (eg. for NMS). - such as bounding boxes, scores and classes. - - Parameters: - detector_output: torch.Tensor - The output of Yolo Detection model - Shape is [batch, k, num_preds] - where, k = # of classes + 4 - k is structured as follows [boxes (4) : # of classes] - and boxes are co-ordinates [x_center, y_center, w, h] - - Returns: - boxes: torch.Tensor - Bounding box locations. Shape is [batch, num preds, 4] where 4 == (x1, y1, x2, y2) - scores: torch.Tensor - Class scores multiplied by confidence: Shape is [batch, num_preds] - masks: torch.Tensor - Predicted masks: Shape is [batch, num_preds, 32] - class_idx: torch.Tensor - Shape is [batch, num_preds] where the last dim is the index of the most probable class of the prediction. - """ - # Break output into parts - detector_output = torch.permute(detector_output, [0, 2, 1]) - boxes_idx, num_classes = 4, 80 - masks_dim = detector_output.shape[-1] - boxes_idx - num_classes - boxes = detector_output[:, :, :4] - scores = detector_output[:, :, 4 : boxes_idx + num_classes] - masks = detector_output[:, :, -masks_dim:] - - # Convert boxes to (x1, y1, x2, y2) - boxes = transform_box_layout_xywh2xyxy(boxes) - - # Get class ID of most likely score. - scores, class_idx = get_most_likely_score(scores) - - return boxes, scores, masks, class_idx diff --git a/qai_hub_models/models/yolov8_seg/perf.yaml b/qai_hub_models/models/yolov8_seg/perf.yaml index ccb33723..5bf09abe 100644 --- a/qai_hub_models/models/yolov8_seg/perf.yaml +++ b/qai_hub_models/models/yolov8_seg/perf.yaml @@ -16,6 +16,8 @@ aggregated: - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra - Samsung Galaxy S21+ + - Snapdragon X Elite CRD + - Snapdragon X Plus 8-Core CRD - SA8775P ADP - QCS8450 (Proxy) - XR2 Gen 2 (Proxy) @@ -30,6 +32,8 @@ aggregated: - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 1 - Snapdragon® 888 + - Snapdragon® X Elite + - Snapdragon® X Plus 8-Core - SA8775P - QCS8450 Proxy - QCS8550 Proxy @@ -40,15 +44,15 @@ aggregated: models: - name: YOLOv8-Segmentation universal_assets: - torchscript_onnx_tflite: mno35wzpn - torchscript_onnx: mq8k1o6vq + torchscript_onnx_tflite: mnw8e65kn + torchscript_onnx: mnj4xoldn performance_metrics: - torchscript_onnx_tflite: - inference_time: 6517.0 - throughput: 153.44483658124904 + inference_time: 6525.0 + throughput: 153.25670498084293 estimated_peak_memory_range: - min: 4567040 - max: 22953008 + min: 4591616 + max: 30498216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -56,14 +60,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 338 - job_id: jgo2o2o4p + job_id: j5wely965 job_status: Passed torchscript_onnx_qnn: - inference_time: 6409.0 - throughput: 156.03058199407084 + inference_time: 6440.0 + throughput: 155.27950310559007 estimated_peak_memory_range: min: 4939776 - max: 14495328 + max: 16369872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -71,14 +75,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: jp4lmlmq5 + job_id: jpxkl0135 job_status: Passed torchscript_onnx: - inference_time: 7650.0 - throughput: 130.718954248366 + inference_time: 7614.0 + throughput: 131.33701076963487 estimated_peak_memory_range: - min: 12259328 - max: 31293336 + min: 12263424 + max: 22383896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -86,7 +90,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 336 - job_id: jglvov1e5 + job_id: j56y8o16p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -95,13 +99,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-11-26T01:10:44Z' + timestamp: '2024-12-11T22:31:36Z' - torchscript_onnx_tflite: - inference_time: 4706.0 - throughput: 212.4946876328092 + inference_time: 4718.0 + throughput: 211.954217888936 estimated_peak_memory_range: - min: 4014080 - max: 59011152 + min: 12288 + max: 56803968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -109,14 +113,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 338 - job_id: jpv6e6e75 + job_id: jg9lzo4lg job_status: Passed torchscript_onnx_qnn: - inference_time: 4587.0 - throughput: 218.00741225201656 + inference_time: 4602.0 + throughput: 217.296827466319 estimated_peak_memory_range: min: 0 - max: 58064400 + max: 61691504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -124,14 +128,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: jpxk3k3j5 + job_id: j5mn09zdp job_status: Passed torchscript_onnx: - inference_time: 5004.0 - throughput: 199.84012789768187 + inference_time: 5017.0 + throughput: 199.32230416583616 estimated_peak_memory_range: - min: 0 - max: 123435984 + min: 17895424 + max: 144590256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -139,7 +143,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 336 - job_id: j56yrydvp + job_id: jp3jzom3g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -148,13 +152,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-11-26T01:10:45Z' + timestamp: '2024-12-11T22:31:37Z' - torchscript_onnx_tflite: - inference_time: 4751.0 - throughput: 210.48200378867608 + inference_time: 4509.0 + throughput: 221.77866489243735 estimated_peak_memory_range: - min: 3911680 - max: 54214848 + min: 4296704 + max: 55257792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -162,14 +166,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 338 - job_id: jgjvovo7g + job_id: jp14no82p job_status: Passed torchscript_onnx_qnn: - inference_time: 4372.0 - throughput: 228.72827081427263 + inference_time: 4375.0 + throughput: 228.57142857142858 estimated_peak_memory_range: - min: 4943872 - max: 58019776 + min: 4820992 + max: 59129440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -177,7 +181,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: j5mnw83qp + job_id: jgn6z1ek5 + job_status: Passed + torchscript_onnx: + inference_time: 3969.0 + throughput: 251.95263290501387 + estimated_peak_memory_range: + min: 17457152 + max: 95961840 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 336 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 336 + job_id: jgo2ldvqp job_status: Passed reference_device_info: name: Snapdragon 8 Elite QRD @@ -186,13 +205,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Snapdragon® 8 Elite - timestamp: '2024-11-26T01:10:46Z' + timestamp: '2024-12-11T22:31:38Z' - torchscript_onnx_tflite: - inference_time: 6534.0 - throughput: 153.04560759106212 + inference_time: 6437.0 + throughput: 155.35187199005748 estimated_peak_memory_range: - min: 4571136 - max: 27553968 + min: 4575232 + max: 29530976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -200,14 +219,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 338 - job_id: jped8d875 + job_id: jgdxd6vep job_status: Passed torchscript_onnx_qnn: - inference_time: 6294.0 - throughput: 158.8814744200826 + inference_time: 6409.0 + throughput: 156.03058199407084 estimated_peak_memory_range: - min: 4972544 - max: 6431592 + min: 4960256 + max: 6222200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -215,7 +234,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: jgn6o67v5 + job_id: jprvlxy0g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -224,13 +243,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8550 Proxy - timestamp: '2024-11-26T01:10:35Z' + timestamp: '2024-12-11T22:31:27Z' - torchscript_onnx_tflite: - inference_time: 93433.0 - throughput: 10.702856592424519 + inference_time: 93326.0 + throughput: 10.715127617169921 estimated_peak_memory_range: - min: 4628480 - max: 52342960 + min: 4775936 + max: 53326576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -238,14 +257,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 338 - job_id: jgz3838z5 + job_id: j5wely935 job_status: Passed torchscript_onnx_qnn: - inference_time: 92330.0 - throughput: 10.830715910321672 + inference_time: 92338.0 + throughput: 10.829777556368992 estimated_peak_memory_range: - min: 458752 - max: 5724752 + min: 1425408 + max: 11919456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -253,7 +272,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: jp2k4kvxp + job_id: jpy1o8d8p job_status: Passed reference_device_info: name: SA7255P ADP @@ -262,13 +281,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA7255P - timestamp: '2024-11-26T01:10:37Z' + timestamp: '2024-12-11T22:31:29Z' - torchscript_onnx_tflite: - inference_time: 6446.0 - throughput: 155.13496742165685 + inference_time: 6479.0 + throughput: 154.3448062972681 estimated_peak_memory_range: min: 4575232 - max: 27889960 + max: 28029696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -276,14 +295,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 338 - job_id: j5we8e8z5 + job_id: jg9lzo4wg job_status: Passed torchscript_onnx_qnn: - inference_time: 6423.0 - throughput: 155.6904873112253 + inference_time: 6295.0 + throughput: 158.85623510722795 estimated_peak_memory_range: - min: 4984832 - max: 6208304 + min: 5001216 + max: 6238960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -291,7 +310,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: jpy1q17rp + job_id: jp0zmor95 job_status: Passed reference_device_info: name: SA8255 (Proxy) @@ -300,13 +319,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8255P Proxy - timestamp: '2024-11-26T01:10:38Z' + timestamp: '2024-12-11T22:31:30Z' - torchscript_onnx_tflite: - inference_time: 10912.0 - throughput: 91.64222873900293 + inference_time: 10913.0 + throughput: 91.63383121048291 estimated_peak_memory_range: min: 4567040 - max: 37259952 + max: 42533280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -314,14 +333,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 338 - job_id: jg9lklkqg + job_id: jp14no88p job_status: Passed torchscript_onnx_qnn: - inference_time: 10226.0 - throughput: 97.78994719342852 + inference_time: 10319.0 + throughput: 96.90861517588914 estimated_peak_memory_range: - min: 32768 - max: 5647200 + min: 1155072 + max: 6964032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -329,7 +348,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: jp0zdzv25 + job_id: jp8qej7kp job_status: Passed reference_device_info: name: SA8295P ADP @@ -338,13 +357,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8295P - timestamp: '2024-11-26T01:10:39Z' + timestamp: '2024-12-11T22:31:32Z' - torchscript_onnx_tflite: - inference_time: 6434.0 - throughput: 155.4243083618278 + inference_time: 6445.0 + throughput: 155.1590380139643 estimated_peak_memory_range: - min: 4579328 - max: 28051464 + min: 4571136 + max: 31090416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -352,14 +371,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 338 - job_id: jp14747kp + job_id: jgdxd6vrp job_status: Passed torchscript_onnx_qnn: - inference_time: 6446.0 - throughput: 155.13496742165685 + inference_time: 6429.0 + throughput: 155.54518587649713 estimated_peak_memory_range: - min: 4984832 - max: 8285584 + min: 4980736 + max: 6888856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -367,7 +386,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: jp8q6q4zp + job_id: jgke26ywg job_status: Passed reference_device_info: name: SA8650 (Proxy) @@ -376,13 +395,28 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8650P Proxy - timestamp: '2024-11-26T01:10:40Z' - - torchscript_onnx_qnn: - inference_time: 10151.0 - throughput: 98.51246182642105 + timestamp: '2024-12-11T22:31:33Z' + - torchscript_onnx_tflite: + inference_time: 10111.0 + throughput: 98.90218573830482 + estimated_peak_memory_range: + min: 4567040 + max: 54991488 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 338 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 338 + job_id: j57yeodv5 + job_status: Passed + torchscript_onnx_qnn: + inference_time: 10136.0 + throughput: 98.65824782951854 estimated_peak_memory_range: - min: 1445888 - max: 6880512 + min: 286720 + max: 6179072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -390,7 +424,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: jgkeoe9yg + job_id: j5q6l42np job_status: Passed reference_device_info: name: SA8775P ADP @@ -399,13 +433,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: SA8775P - timestamp: '2024-11-26T01:10:42Z' + timestamp: '2024-12-11T22:31:34Z' - torchscript_onnx_tflite: - inference_time: 9761.0 - throughput: 102.44851961889151 + inference_time: 9669.0 + throughput: 103.42331161443789 estimated_peak_memory_range: - min: 4567040 - max: 43984144 + min: 4595712 + max: 45791840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -413,14 +447,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 338 - job_id: j57ykykq5 + job_id: jp4lyew85 job_status: Passed torchscript_onnx_qnn: - inference_time: 9567.0 - throughput: 104.52597470471412 + inference_time: 9583.0 + throughput: 104.35145570280706 estimated_peak_memory_range: - min: 4935680 - max: 45096320 + min: 4931584 + max: 49529424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -428,7 +462,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: j5q6z6m7p + job_id: jglvywkj5 job_status: Passed reference_device_info: name: QCS8450 (Proxy) @@ -437,12 +471,42 @@ models: os_name: Android manufacturer: Qualcomm chipset: QCS8450 Proxy - timestamp: '2024-11-26T01:10:43Z' - - reference_device_info: + timestamp: '2024-12-11T22:31:35Z' + - torchscript_onnx_qnn: + inference_time: 6833.0 + throughput: 146.34860237084735 + estimated_peak_memory_range: + min: 4923392 + max: 4923392 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 333 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 333 + job_id: jp2kromrp + job_status: Passed + torchscript_onnx: + inference_time: 7696.0 + throughput: 129.93762993762994 + estimated_peak_memory_range: + min: 17530880 + max: 17530880 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 336 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 336 + job_id: jpv6l21k5 + job_status: Passed + reference_device_info: name: Snapdragon X Elite CRD os: '11' form_factor: Compute os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-11-26T01:10:47Z' + timestamp: '2024-12-11T22:31:39Z' diff --git a/qai_hub_models/models/yolov8_seg/test.py b/qai_hub_models/models/yolov8_seg/test.py index 46c927d7..6776e37b 100644 --- a/qai_hub_models/models/yolov8_seg/test.py +++ b/qai_hub_models/models/yolov8_seg/test.py @@ -7,13 +7,11 @@ import torch from ultralytics import YOLO as ultralytics_YOLO -from qai_hub_models.models.yolov8_seg.app import YoloV8SegmentationApp +from qai_hub_models.models._shared.yolo.app import YoloSegmentationApp +from qai_hub_models.models._shared.yolo.model import yolo_segment_postprocess from qai_hub_models.models.yolov8_seg.demo import IMAGE_ADDRESS, OUTPUT_IMAGE_ADDRESS from qai_hub_models.models.yolov8_seg.demo import main as demo_main -from qai_hub_models.models.yolov8_seg.model import ( - YoloV8Segmentor, - yolov8_segment_postprocess, -) +from qai_hub_models.models.yolov8_seg.model import NUM_ClASSES, YoloV8Segmentor from qai_hub_models.utils.asset_loaders import load_image from qai_hub_models.utils.image_processing import preprocess_PIL_image from qai_hub_models.utils.testing import assert_most_close @@ -25,14 +23,14 @@ def test_task(): """Verify that raw (numeric) outputs of both (QAIHM and non-qaihm) networks are the same.""" source_model = ultralytics_YOLO(WEIGHTS).model qaihm_model = YoloV8Segmentor.from_pretrained(WEIGHTS) - qaihm_app = YoloV8SegmentationApp(qaihm_model) + qaihm_app = YoloSegmentationApp(qaihm_model) processed_sample_image = preprocess_PIL_image(load_image(IMAGE_ADDRESS)) processed_sample_image = qaihm_app.preprocess_input(processed_sample_image) with torch.no_grad(): # original model output source_out = source_model(processed_sample_image) - source_out_postprocessed = yolov8_segment_postprocess(source_out[0]) + source_out_postprocessed = yolo_segment_postprocess(source_out[0], NUM_ClASSES) source_out = [*source_out_postprocessed, source_out[1][-1]] # Qualcomm AI Hub Model output @@ -49,7 +47,7 @@ def test_trace(): # Collect output via app for traced model img = load_image(IMAGE_ADDRESS) - app = YoloV8SegmentationApp(trace) + app = YoloSegmentationApp(trace) out_imgs = app.predict(img) expected_out = load_image(OUTPUT_IMAGE_ADDRESS) diff --git a/qai_hub_models/scorecard/device.py b/qai_hub_models/scorecard/device.py index fe1766d9..2ccf14a1 100644 --- a/qai_hub_models/scorecard/device.py +++ b/qai_hub_models/scorecard/device.py @@ -28,6 +28,15 @@ def _get_cached_device(device_name: str) -> hub.Device: class ScorecardDevice: _registry: dict[str, "ScorecardDevice"] = {} + @classmethod + def get(cls, device_name: str): + return [ + x + for x in ScorecardDevice.all_devices() + if x.reference_device_name == device_name + or x.execution_device_name == device_name + ][0] + @classmethod def all_devices( cls, diff --git a/qai_hub_models/scorecard/execution_helpers.py b/qai_hub_models/scorecard/execution_helpers.py index 080bb074..01d885e8 100644 --- a/qai_hub_models/scorecard/execution_helpers.py +++ b/qai_hub_models/scorecard/execution_helpers.py @@ -41,7 +41,7 @@ def get_compile_parameterized_pytest_config( """ Get a pytest parameterization list of all enabled (device, compile path) pairs. """ - path_list: list[ScorecardCompilePath] = ScorecardCompilePath.all_compile_paths( + path_list: list[ScorecardCompilePath] = ScorecardCompilePath.all_paths( enabled=True, supports_quantization=model_is_quantized or None ) @@ -68,7 +68,7 @@ def get_profile_parameterized_pytest_config( """ Get a pytest parameterization list of all enabled (device, profile path) pairs. """ - path_list: list[ScorecardProfilePath] = ScorecardProfilePath.all_profile_paths( + path_list: list[ScorecardProfilePath] = ScorecardProfilePath.all_paths( enabled=True, supports_quantization=model_is_quantized or None ) needs_fp16 = not model_is_quantized diff --git a/qai_hub_models/scorecard/path_compile.py b/qai_hub_models/scorecard/path_compile.py index c8eacfc0..b04cc768 100644 --- a/qai_hub_models/scorecard/path_compile.py +++ b/qai_hub_models/scorecard/path_compile.py @@ -33,7 +33,7 @@ def enabled(self) -> bool: ) @staticmethod - def all_compile_paths( + def all_paths( enabled: Optional[bool] = None, supports_quantization: Optional[bool] = None, ) -> list["ScorecardCompilePath"]: diff --git a/qai_hub_models/scorecard/path_profile.py b/qai_hub_models/scorecard/path_profile.py index 899c7f0e..bb8002f4 100644 --- a/qai_hub_models/scorecard/path_profile.py +++ b/qai_hub_models/scorecard/path_profile.py @@ -35,7 +35,7 @@ def enabled(self) -> bool: ) @staticmethod - def all_profile_paths( + def all_paths( enabled: Optional[bool] = None, supports_quantization: Optional[bool] = None, ) -> list["ScorecardProfilePath"]: diff --git a/qai_hub_models/scorecard/results/performance_summary.py b/qai_hub_models/scorecard/results/performance_summary.py index a078bb25..58383511 100644 --- a/qai_hub_models/scorecard/results/performance_summary.py +++ b/qai_hub_models/scorecard/results/performance_summary.py @@ -4,12 +4,9 @@ # --------------------------------------------------------------------- from __future__ import annotations -import functools -import multiprocessing import pprint from collections.abc import Iterable -from dataclasses import dataclass -from typing import Any, Union +from typing import Any, Generic, TypeVar, Union from qai_hub_models.scorecard import ( ScorecardCompilePath, @@ -23,9 +20,11 @@ ) from qai_hub_models.scorecard.results.scorecard_job import ( CompileScorecardJob, + InferenceScorecardJob, ProfileScorecardJob, + ScorecardJobTypeVar, + ScorecardPathTypeVar, ) -from qai_hub_models.utils.config_loaders import MODEL_IDS # Caching this information is helpful because it requires pulling data from hub. # Pulling data from hub is slow. @@ -58,21 +57,124 @@ def get_reference_device_info(device: ScorecardDevice) -> dict[str, str]: return __REFERENCE_DEVICE_INFO_PER_CHIPSET[chipset] -@dataclass -class DevicePerfSummary: - device: ScorecardDevice - run_per_path: dict[ScorecardProfilePath, ProfileScorecardJob] # Map - - @staticmethod - def from_runs(device: ScorecardDevice, path_runs: list[ProfileScorecardJob]): +class ScorecardDeviceSummary(Generic[ScorecardJobTypeVar, ScorecardPathTypeVar]): + def __init__( + self, + device: ScorecardDevice, + run_per_path: dict[ + ScorecardPathTypeVar, ScorecardJobTypeVar + ], # Map + ): + self.device = device + self.run_per_path: dict[ + ScorecardPathTypeVar, ScorecardJobTypeVar + ] = run_per_path + + @classmethod + def from_runs( + cls: type[_DeviceSummaryTypeVar], + device: ScorecardDevice, + path_runs: list[ScorecardJobTypeVar], + ): # Figure out unique devices in various baselines - run_per_path: dict[ScorecardProfilePath, ProfileScorecardJob] = {} + run_per_path: dict[ScorecardPathTypeVar, ScorecardJobTypeVar] = {} for run in path_runs: assert run._device == device # Device should match - run_per_path[run.path] = run + run_per_path[run.path] = run # type: ignore + + return cls(device, run_per_path) + + +_DeviceSummaryTypeVar = TypeVar("_DeviceSummaryTypeVar", bound=ScorecardDeviceSummary) +# Specific typevar. Autofill has trouble resolving types for nested generics without specifically listing ineritors of the generic base. +DeviceSummaryTypeVar = TypeVar( + "DeviceSummaryTypeVar", + "DevicePerfSummary", + "DeviceCompileSummary", + "DeviceInferenceSummary", +) + + +class ScorecardModelSummary(Generic[DeviceSummaryTypeVar, ScorecardJobTypeVar]): + device_summary_type: type[DeviceSummaryTypeVar] + + def __init__( + self, + model_id: str, + runs_per_device: dict[ScorecardDevice, DeviceSummaryTypeVar], + ): + self.model_id = model_id + self.runs_per_device: dict[ + ScorecardDevice, DeviceSummaryTypeVar + ] = runs_per_device + + @classmethod + def from_runs( + cls: type[_ModelSummaryTypeVar], + model_id: str, + path_runs: list[ScorecardJobTypeVar], + ): + runs_per_device: dict[ScorecardDevice, list[ScorecardJobTypeVar]] = {} + for run in path_runs: + assert run.model_id == model_id # model id should match + list = runs_per_device.get(run._device, []) + runs_per_device[run._device] = list + list.append(run) + + return cls( + model_id, + { + device: cls.device_summary_type.from_runs(device, runs) + for device, runs in runs_per_device.items() + }, + ) - return DevicePerfSummary(device, run_per_path) +_ModelSummaryTypeVar = TypeVar("_ModelSummaryTypeVar", bound=ScorecardModelSummary) +# Specific typevar. Autofill has trouble resolving types for nested generics without specifically listing ineritors of the generic base. +ModelSummaryTypeVar = TypeVar( + "ModelSummaryTypeVar", + "ModelPerfSummary", + "ModelCompileSummary", + "ModelInferenceSummary", +) + + +class ScorecardSummary(Generic[ModelSummaryTypeVar, ScorecardJobTypeVar]): + model_summary_type: type[ModelSummaryTypeVar] + + def __init__(self, runs_per_model: dict[str, ModelSummaryTypeVar]): + self.runs_per_model: dict[str, ModelSummaryTypeVar] = runs_per_model + + @classmethod + def from_runs( + cls: type[_ScorecardSummaryTypeVar], model_runs: list[ScorecardJobTypeVar] + ) -> _ScorecardSummaryTypeVar: + # Figure out unique models in various baselines + runs_per_model: dict[str, list[ScorecardJobTypeVar]] = {} + for run in model_runs: + list = runs_per_model.get(run.model_id, []) + list.append(run) + runs_per_model[run.model_id] = list + + return cls( + { + model_id: cls.model_summary_type.from_runs(model_id, runs) + for model_id, runs in runs_per_model.items() + } + ) + + +_ScorecardSummaryTypeVar = TypeVar("_ScorecardSummaryTypeVar", bound=ScorecardSummary) +# Specific typevar. Autofill has trouble resolving types for nested generics without specifically listing ineritors of the generic base. +ScorecardSummaryTypeVar = TypeVar( + "ScorecardSummaryTypeVar", "CompileSummary", "PerfSummary", "InferenceSummary" +) + + +class DevicePerfSummary( + ScorecardDeviceSummary[ProfileScorecardJob, ScorecardProfilePath] +): def get_perf_card( self, include_failed_jobs: bool = True, @@ -107,30 +209,8 @@ def __repr__(self) -> str: return pprint.pformat(self.get_perf_card()) -@dataclass -class ModelPerfSummary: - model_id: str - runs_per_device: dict[ - ScorecardDevice, DevicePerfSummary - ] # Map - - @staticmethod - def from_runs(model_id: str, device_runs: list[ProfileScorecardJob]): - # Figure out unique devices in various baselines - runs_per_device: dict[ScorecardDevice, list[ProfileScorecardJob]] = {} - for run in device_runs: - assert run.model_id == model_id # All should have the same model ID - list = runs_per_device.get(run._device, []) - runs_per_device[run._device] = list - list.append(run) - - return ModelPerfSummary( - model_id, - { - device: DevicePerfSummary.from_runs(device, runs) - for device, runs in runs_per_device.items() - }, - ) +class ModelPerfSummary(ScorecardModelSummary[DevicePerfSummary, ProfileScorecardJob]): + device_summary_type = DevicePerfSummary def get_universal_assets(self, exclude_paths: Iterable[ScorecardProfilePath] = []): universal_assets = {} @@ -142,7 +222,7 @@ def get_universal_assets(self, exclude_paths: Iterable[ScorecardProfilePath] = [ for runs_per_device in self.runs_per_device.values(): path_run = runs_per_device.run_per_path.get(path, None) if path_run and path_run.success: - universal_assets[path.long_name] = path_run.job.model.model_id # type: ignore + universal_assets[path.long_name] = path_run.job.model.model_id return universal_assets @@ -173,73 +253,8 @@ def __repr__(self): return pprint.pformat(self.get_perf_card()) -@dataclass -class PerfSummary: - runs_per_model: dict[str, ModelPerfSummary] # Map - - @staticmethod - def from_model_ids( - job_ids: dict[str, str], - model_ids=MODEL_IDS, - max_job_wait_secs: int | None = None, - ) -> dict[str, PerfSummary]: - """ - Reads jobs for every `model_id` from the dictionary and creates summaries for each. `job_ids` format: - Either: - _-_ : job_id - _- : job_id - - Returns models in this format: - model_id: list[Summary] - """ - print("Generating Performance Summary for Models") - pool = multiprocessing.Pool(processes=15) - model_summaries = pool.map( - functools.partial( - PerfSummary.from_model_id, - job_ids=job_ids, - max_job_wait_secs=max_job_wait_secs, - ), - model_ids, - ) - pool.close() - print("Finished\n") - return {k: v for k, v in model_summaries} - - @staticmethod - def from_model_id( - model_id: str, - job_ids: dict[str, str], - max_job_wait_secs: int | None = None, - ) -> tuple[str, PerfSummary]: - """ - Reads jobs for every `model_id` from the dictionary and creates summaries for each. `job_ids` format: - Either: - _-_ : job_id - _- : job_id - - Returns models in this format: - model_id: list[Summary] - """ - print(f" {model_id} ") - runs = ProfileScorecardJob.from_model_id(model_id, job_ids, max_job_wait_secs) - return model_id, PerfSummary.from_runs(runs) - - @staticmethod - def from_runs(model_runs: list[ProfileScorecardJob]): - # Figure out unique models in various baselines - runs_per_model: dict[str, list[ProfileScorecardJob]] = {} - for run in model_runs: - list = runs_per_model.get(run.model_id, []) - list.append(run) - runs_per_model[run.model_id] = list - - return PerfSummary( - { - model_id: ModelPerfSummary.from_runs(model_id, runs) - for model_id, runs in runs_per_model.items() - } - ) +class PerfSummary(ScorecardSummary[ModelPerfSummary, ProfileScorecardJob]): + model_summary_type = ModelPerfSummary def get_chipsets( self, @@ -311,114 +326,33 @@ def __repr__(self): return pprint.pformat(self.get_perf_card()) -@dataclass -class DeviceCompileSummary: - device: ScorecardDevice - run_per_path: dict[ScorecardCompilePath, CompileScorecardJob] # Map +class DeviceCompileSummary( + ScorecardDeviceSummary[CompileScorecardJob, ScorecardCompilePath] +): + pass - @staticmethod - def from_runs(device: ScorecardDevice, path_runs: list[CompileScorecardJob]): - # Figure out unique devices in various baselines - run_per_path: dict[ScorecardCompilePath, CompileScorecardJob] = {} - for run in path_runs: - assert run._device == device # Device should match - run_per_path[run.path] = run - return DeviceCompileSummary(device, run_per_path) +class ModelCompileSummary( + ScorecardModelSummary[DeviceCompileSummary, CompileScorecardJob] +): + device_summary_type = DeviceCompileSummary -@dataclass -class ModelCompileSummary: - model_id: str - runs_per_device: dict[ - ScorecardDevice, DeviceCompileSummary - ] # Map +class CompileSummary(ScorecardSummary[ModelCompileSummary, CompileScorecardJob]): + model_summary_type = ModelCompileSummary - @staticmethod - def from_runs(model_id: str, path_runs: list[CompileScorecardJob]): - runs_per_device: dict[ScorecardDevice, list[CompileScorecardJob]] = {} - for run in path_runs: - assert run.model_id == model_id # model id should match - list = runs_per_device.get(run._device, []) - runs_per_device[run._device] = list - list.append(run) - return ModelCompileSummary( - model_id, - { - device: DeviceCompileSummary.from_runs(device, runs) - for device, runs in runs_per_device.items() - }, - ) +class DeviceInferenceSummary( + ScorecardDeviceSummary[InferenceScorecardJob, ScorecardProfilePath] +): + pass -@dataclass -class CompileSummary: - runs_per_model: dict[str, ModelCompileSummary] # Map - - @staticmethod - def from_model_ids( - job_ids: dict[str, str], - model_ids=MODEL_IDS, - max_job_wait_secs: int | None = None, - ) -> dict[str, CompileSummary]: - """ - Reads jobs for every `model_id` from the dictionary and creates summaries for each. `job_ids` format: - Either: - _-_ : job_id - _- : job_id - __ : job_id - _ : job_id - - Returns models in this format: - model_id: list[Summary] - """ - print("Generating Compilation Summary for Models") - pool = multiprocessing.Pool(processes=15) - model_summaries = pool.map( - functools.partial( - CompileSummary.from_model_id, - job_ids=job_ids, - max_job_wait_secs=max_job_wait_secs, - ), - model_ids, - ) - pool.close() - print("Finished\n") - return {k: v for k, v in model_summaries} - @staticmethod - def from_model_id( - model_id: str, - job_ids: dict[str, str], - max_job_wait_secs: int | None = None, - ) -> tuple[str, CompileSummary]: - """ - Reads jobs for every `model_id` from the dictionary and creates summaries for each. `job_ids` format: - Either: - _-_ : job_id - _- : job_id - __ : job_id - _ : job_id - - Returns models in this format: - model_id: list[Summary] - """ - print(f" {model_id} ") - runs = CompileScorecardJob.from_model_id(model_id, job_ids, max_job_wait_secs) - return model_id, CompileSummary.from_runs(runs) - - @staticmethod - def from_runs(model_runs: list[CompileScorecardJob]) -> CompileSummary: - # Figure out unique models in various baselines - runs_per_model: dict[str, list[CompileScorecardJob]] = {} - for run in model_runs: - list = runs_per_model.get(run.model_id, []) - list.append(run) - runs_per_model[run.model_id] = list +class ModelInferenceSummary( + ScorecardModelSummary[DeviceInferenceSummary, InferenceScorecardJob] +): + device_summary_type = DeviceInferenceSummary - return CompileSummary( - { - model_id: ModelCompileSummary.from_runs(model_id, runs) - for model_id, runs in runs_per_model.items() - } - ) + +class InferenceSummary(ScorecardSummary[ModelInferenceSummary, InferenceScorecardJob]): + model_summary_type = ModelInferenceSummary diff --git a/qai_hub_models/scorecard/results/scorecard_job.py b/qai_hub_models/scorecard/results/scorecard_job.py index eb358844..69af22e3 100644 --- a/qai_hub_models/scorecard/results/scorecard_job.py +++ b/qai_hub_models/scorecard/results/scorecard_job.py @@ -2,101 +2,138 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- +from __future__ import annotations + import datetime -from dataclasses import dataclass from functools import cached_property -from typing import Any, Optional, Union, cast +from typing import Any, Generic, Optional, TypeVar, Union, cast import qai_hub as hub +from qai_hub.public_rest_api import DatasetEntries from qai_hub_models.scorecard import ( ScorecardCompilePath, ScorecardDevice, ScorecardProfilePath, ) -from qai_hub_models.scorecard.execution_helpers import get_async_job_cache_name -from qai_hub_models.utils.config_loaders import QAIHMModelCodeGen, QAIHMModelInfo + +JobTypeVar = TypeVar("JobTypeVar", hub.ProfileJob, hub.InferenceJob, hub.CompileJob) +ScorecardPathTypeVar = TypeVar( + "ScorecardPathTypeVar", ScorecardCompilePath, ScorecardProfilePath +) + +# Specific typevar. Autofill has trouble resolving types for nested generics without specifically listing ineritors of the generic base. +ScorecardJobTypeVar = TypeVar( + "ScorecardJobTypeVar", + "CompileScorecardJob", + "ProfileScorecardJob", + "InferenceScorecardJob", +) -@dataclass -class ScorecardJob: - model_id: str - job_id: Optional[str] - _device: ScorecardDevice - # Setting for how the ScorecardJob class should treat a job. - # None | Wait an infinite amount of time the job to finish - # < 0 | Ignore job if running (treat it as skipped) - # >= 0 | Wait this many seconds for the job to finish - max_job_wait_secs: Optional[int] +class ScorecardJob(Generic[JobTypeVar, ScorecardPathTypeVar]): + job_type_class: type[JobTypeVar] + + def __init__( + self, + model_id: str, + job_id: Optional[str], + device: ScorecardDevice, + wait_for_job: bool, # If false, running jobs are treated like they were "skipped". + wait_job_secs: Optional[int], # None == any number of seconds + path: ScorecardPathTypeVar, + ): + self.model_id = model_id + self.job_id = job_id + self._device = device + self.wait_for_job = wait_for_job + self.wait_job_secs = wait_job_secs + self.path: ScorecardPathTypeVar = path + self.__post_init__() def __post_init__(self): assert self.model_id # Verify Job Exists - if self.job_id and (not self.max_job_wait_secs or self.max_job_wait_secs >= 0): + if self.job_id and not self.wait_for_job: assert self.job - @classmethod - def from_model_id( - cls: type["ScorecardJob"], model_id: str, job_ids: dict[str, str] - ) -> list: - """ - Reads jobs for `model_id` from the dictionary and creates summaries for each. `job_ids` format: - Either: - ||| : job_id - || : job_id - - Returns models in this format: - model_id: list[Summary] - """ - raise NotImplementedError() + if not self.skipped and not isinstance(self.job, self.job_type_class): + raise ValueError( + f"Job {self.job.job_id}({self.job.name}) is {type(self.job)}. Expected {self.job_type_class.__name__}" + ) @cached_property - def job(self) -> Optional[hub.Job]: - """Get the hub.CompileJob object.""" + def job(self) -> JobTypeVar: + """ + Get the AI Hub Job. + Waits for completion if necessary. + """ if not self.job_id: - return None + raise ValueError("No Job ID") - job = hub.get_job(self.job_id) + job = cast(JobTypeVar, hub.get_job(self.job_id)) if not job.get_status().finished: - if self.max_job_wait_secs and self.max_job_wait_secs < 0: - return None + if not self.wait_for_job: + return job else: - job.wait(self.max_job_wait_secs) + job.wait(self.wait_job_secs) return job @cached_property def skipped(self) -> bool: - return self.job is None + # + # Running is treated as skipped. + # + # Either the class would have waited for this job already, + # or the class was told to treat running jobs like they were skipped. + # + return not self.job_id or self._job_status.running @cached_property def failed(self) -> bool: - return self._job_status and self._job_status.failure # type: ignore + return not self.skipped and self._job_status.failure @cached_property def success(self) -> bool: - return self._job_status and self._job_status.success # type: ignore + return not self.skipped and self._job_status.success @cached_property def status_message(self) -> Optional[str]: - return None if self.skipped else self._job_status.message # type: ignore + return None if self.skipped else self._job_status.message @cached_property - def _job_status(self) -> Optional[hub.JobStatus]: + def _job_status(self) -> hub.JobStatus: """Get the job status of the profile job.""" - if not self.skipped: - return self.job.get_status() # type: ignore - return None + if self.job_id: + return self.job.get_status() + raise ValueError("Can't get status without a job ID.") @cached_property def job_status(self) -> str: """Get the job status of the profile job.""" if not self.skipped: - if self._job_status.success: # type: ignore + if self._job_status.success: return "Passed" - elif self._job_status.failure: # type: ignore + elif self._job_status.failure: return "Failed" return "Skipped" + @cached_property + def device(self) -> hub.Device: + return self.job.device if not self.skipped else self._device.reference_device + + @cached_property + def chipset(self) -> str: + """Chipset the job was run on.""" + if self.skipped: + return self._device.chipset + + hub_device = self.device + for attr in hub_device.attributes: + if attr.startswith("chipset:"): + return attr.split(":")[1] + raise ValueError("No chipset found.") + @cached_property def quantized(self) -> str: """Quantized models are marked so precision can be correctly recorded.""" @@ -114,193 +151,29 @@ def date(self) -> Optional[datetime.datetime]: return self.job.date -@dataclass -class CompileScorecardJob(ScorecardJob): - path: ScorecardCompilePath +class CompileScorecardJob(ScorecardJob[hub.CompileJob, ScorecardCompilePath]): + job_type_class = hub.CompileJob - @classmethod - def from_model_id( - cls: type["CompileScorecardJob"], - model_id: str, - job_ids: dict[str, str], - max_job_wait_secs=None, - ) -> list["CompileScorecardJob"]: - """ - Reads jobs for `model_id` from the dictionary and creates summaries for each. `job_ids` format: - Either: - ||| : job_id - || : job_id - Returns models in this format: - model_id: list[Summary] - """ - model_info = QAIHMModelInfo.from_model(model_id) - model_code_gen: QAIHMModelCodeGen = model_info.code_gen_config - model_runs = [] - components = [] - - if model_code_gen.components: - if model_code_gen.default_components: - components = model_code_gen.default_components - else: - components = list(model_code_gen.components.keys()) - else: - components.append(None) # type: ignore - - path: ScorecardCompilePath - for path in ScorecardCompilePath.all_compile_paths(enabled=True): - for component in components: - model_requires_fp16 = not ( - model_code_gen.is_aimet or model_code_gen.use_hub_quantization - ) - for device in ScorecardDevice.all_devices( - enabled=True, - supports_fp16_npu=model_requires_fp16 or None, - supports_compile_path=path, - ): - model_runs.append( - cls( - model_id=component or model_info.name, - job_id=job_ids.get( - get_async_job_cache_name( - path=path, - model_id=model_id, - device=device, - component=component, - ) - ), - path=path, - _device=device, - max_job_wait_secs=max_job_wait_secs, - ) - ) - - return model_runs +class ProfileScorecardJob(ScorecardJob[hub.ProfileJob, ScorecardProfilePath]): + job_type_class = hub.ProfileJob def __post_init__(self): super().__post_init__() - if not self.skipped: - if not isinstance(self.job, hub.CompileJob): - raise ValueError(f"Job {self.job.job_id}({self.job.name}) is {type(self.job)}. Expected CompileJob") # type: ignore + if not self.skipped and self._job_status.success: + assert self.profile_results # Download results immediately @cached_property - def compile_job(self) -> Optional[hub.CompileJob]: - """Get the hub.CompileJob object.""" - if self.job: - return None - return cast(hub.CompileJob, self.job) - - -@dataclass -class ProfileScorecardJob(ScorecardJob): - path: ScorecardProfilePath - - @classmethod - def from_model_id( - cls: type["ProfileScorecardJob"], - model_id: str, - job_ids: dict[str, str], - max_job_wait_secs=None, - ) -> list["ProfileScorecardJob"]: - """ - Reads jobs for `model_id` from the dictionary and creates summaries for each. `job_ids` format: - Either: - ||| : job_id - || : job_id - - Returns models in this format: - model_id: list[Summary] - """ - model_info = QAIHMModelInfo.from_model(model_id) - model_code_gen: QAIHMModelCodeGen = model_info.code_gen_config - model_runs = [] - components = [] - - if model_code_gen.components: - if model_code_gen.default_components: - components = model_code_gen.default_components - else: - components = list(model_code_gen.components.keys()) - else: - components.append(None) # type: ignore - - path: ScorecardProfilePath - for path in ScorecardProfilePath.all_profile_paths(enabled=True): - for component in components: - model_requires_fp16 = not ( - model_code_gen.is_aimet or model_code_gen.use_hub_quantization - ) - for device in ScorecardDevice.all_devices( - enabled=True, - supports_fp16_npu=model_requires_fp16 or None, - supports_profile_path=path, - ): - model_runs.append( - cls( - model_id=component or model_info.name, - job_id=job_ids.get( - get_async_job_cache_name( - path=path, - model_id=model_id, - device=device, - component=component, - ), - None, - ), - _device=device, - path=path, - max_job_wait_secs=max_job_wait_secs, - ) - ) - - return model_runs - - def __post_init__(self): - super().__post_init__() - if not self.skipped: - if not isinstance(self.job, hub.ProfileJob): - raise ValueError(f"Job {self.job.job_id}({self.job.name}) is {type(self.job)}. Expected ProfileJob") # type: ignore - if self._job_status.success: # type: ignore - assert self.profile_results - - @cached_property - def chipset(self) -> str: - """Chipset the job was run on.""" - if not self.job: - return self._device.chipset - - hub_device = self.device - for attr in hub_device.attributes: - if attr.startswith("chipset:"): - return attr.split(":")[1] - raise ValueError("No chipset found.") - - @cached_property - def device(self) -> hub.Device: - return ( - self.job.device - if self.job and isinstance(self.job, hub.ProfileJob) - else self._device.reference_device - ) - - @cached_property - def profile_job(self) -> Optional[hub.ProfileJob]: - """Get the hub.CompileJob object.""" - if not self.job: - return None - return cast(hub.ProfileJob, self.job) - - @cached_property - def profile_results(self) -> Optional[dict[str, Any]]: + def profile_results(self) -> dict[str, Any]: """Profile results from profile job.""" - if self.job_status == "Passed": - return self.profile_job.download_profile() # type: ignore - return None + if self.success: + return self.job.download_profile() # type: ignore + raise ValueError("Can't get profile results if job did not succeed.") @cached_property def inference_time(self) -> Union[float, str]: """Get the inference time from the profile job.""" - if self.profile_results is not None: + if self.success: return float( self.profile_results["execution_summary"]["estimated_inference_time"] ) @@ -326,22 +199,22 @@ def get_layer_info(self, unit: str) -> int: return 0 @cached_property - def npu(self) -> Any: + def npu(self) -> int: """Get number of layers running on NPU.""" - return self.get_layer_info("NPU") if self.profile_results is not None else 0 + return self.get_layer_info("NPU") if self.success else 0 @cached_property - def gpu(self) -> Any: + def gpu(self) -> int: """Get number of layers running on GPU.""" - return self.get_layer_info("GPU") if self.profile_results is not None else 0 + return self.get_layer_info("GPU") if self.success else 0 @cached_property - def cpu(self) -> Any: + def cpu(self) -> int: """Get number of layers running on CPU.""" - return self.get_layer_info("CPU") if self.profile_results is not None else 0 + return self.get_layer_info("CPU") if self.success else 0 @cached_property - def total(self) -> Any: + def total(self) -> int: """Get the total number of layers.""" return self.npu + self.gpu + self.cpu @@ -364,7 +237,7 @@ def primary_compute_unit(self) -> str: @cached_property def peak_memory_range(self) -> dict[str, int]: """Get the estimated peak memory range.""" - if self.profile_results is not None: + if self.success: low, high = self.profile_results["execution_summary"][ "inference_memory_peak_range" ] @@ -374,7 +247,7 @@ def peak_memory_range(self) -> dict[str, int]: @cached_property def precision(self) -> str: """Get the precision of the model based on the run.""" - if self.profile_results is not None: + if self.success: compute_unit = self.primary_compute_unit if compute_unit == "CPU": return "fp32" @@ -383,16 +256,6 @@ def precision(self) -> str: return "fp16" return "null" - @cached_property - def llm_metrics(self) -> Union[dict[str, Any], str]: - """Get LLM specific metrics.""" - return "null" - - @cached_property - def evaluation_metrics(self) -> Union[dict[str, Any], str]: - """Get evaluation_metrics.""" - return "null" - @cached_property def performance_metrics(self) -> dict[str, Any]: metrics = dict( @@ -410,8 +273,20 @@ def performance_metrics(self) -> dict[str, Any]: job_id=self.job_id, job_status=self.job_status, ) - if self.llm_metrics != "null": - metrics["llm_metrics"] = self.llm_metrics - if self.evaluation_metrics != "null": - metrics["evaluation_metrics"] = self.evaluation_metrics return metrics + + +class InferenceScorecardJob(ScorecardJob[hub.InferenceJob, ScorecardProfilePath]): + job_type_class = hub.InferenceJob + + @property + def input_dataset(self) -> DatasetEntries: + """Input dataset.""" + return cast(DatasetEntries, self.job.inputs.download()) + + @property + def output_dataset(self) -> DatasetEntries: + """Output dataset.""" + if not self.success: + raise ValueError("Can't get output dataset if job did not succeed.") + return cast(DatasetEntries, self.job.download_output_data()) diff --git a/qai_hub_models/scorecard/results/yaml.py b/qai_hub_models/scorecard/results/yaml.py index 1984ff24..5d218cd2 100644 --- a/qai_hub_models/scorecard/results/yaml.py +++ b/qai_hub_models/scorecard/results/yaml.py @@ -4,10 +4,10 @@ # --------------------------------------------------------------------- from __future__ import annotations -import abc +import multiprocessing import os from pathlib import Path -from typing import Any, Optional +from typing import Generic, Optional, TypeVar, cast import ruamel.yaml @@ -16,23 +16,42 @@ from qai_hub_models.scorecard.execution_helpers import get_async_job_cache_name from qai_hub_models.scorecard.path_compile import ScorecardCompilePath from qai_hub_models.scorecard.path_profile import ScorecardProfilePath +from qai_hub_models.scorecard.results.performance_summary import ( + CompileSummary, + InferenceSummary, + PerfSummary, + ScorecardSummaryTypeVar, +) from qai_hub_models.scorecard.results.scorecard_job import ( CompileScorecardJob, + InferenceScorecardJob, ProfileScorecardJob, + ScorecardJobTypeVar, + ScorecardPathTypeVar, ) +from qai_hub_models.utils.config_loaders import MODEL_IDS, QAIHMModelInfo from qai_hub_models.utils.path_helpers import get_qaihm_package_root INTERMEDIATES_DIR = get_qaihm_package_root() / "scorecard" / "intermediates" COMPILE_YAML_BASE = INTERMEDIATES_DIR / "compile-jobs.yaml" PROFILE_YAML_BASE = INTERMEDIATES_DIR / "profile-jobs.yaml" +ScorecardJobYamlTypeVar = TypeVar("ScorecardJobYamlTypeVar", bound="ScorecardJobYaml") + +class ScorecardJobYaml( + Generic[ScorecardJobTypeVar, ScorecardPathTypeVar, ScorecardSummaryTypeVar] +): + scorecard_job_type: type[ScorecardJobTypeVar] + scorecard_path_type: type[ScorecardPathTypeVar] + scorecard_summary_type: type[ScorecardSummaryTypeVar] -class ScorecardJobYaml: def __init__(self, job_id_mapping: dict[str, str] | None = None): self.job_id_mapping = job_id_mapping or dict() @classmethod - def from_file(cls, config_path: str | Path) -> ScorecardJobYaml: + def from_file( + cls: type[ScorecardJobYamlTypeVar], config_path: str | Path + ) -> ScorecardJobYamlTypeVar: """Read yaml files.""" if not os.path.exists(config_path): raise FileNotFoundError(f"File not found with job ids at {config_path}") @@ -48,7 +67,7 @@ def to_file(self, path: str | Path) -> None: def get_job_id( self, - path: ScorecardCompilePath | ScorecardProfilePath | TargetRuntime, + path: ScorecardPathTypeVar | TargetRuntime, model_id: str, device: ScorecardDevice, component: Optional[str] = None, @@ -67,13 +86,23 @@ def get_job_id( using the provided device is not available. """ if x := self.job_id_mapping.get( - get_async_job_cache_name(path, model_id, device, component) + get_async_job_cache_name( + path, + model_id, + device, + component, + ) ): return x if fallback_to_universal_device: return self.job_id_mapping.get( - get_async_job_cache_name(path, model_id, cs_universal, component) + get_async_job_cache_name( + path, + model_id, + cs_universal, + component, + ) ) return None @@ -81,7 +110,7 @@ def get_job_id( def set_job_id( self, job_id, - path: ScorecardCompilePath | ScorecardProfilePath | TargetRuntime, + path: ScorecardPathTypeVar | TargetRuntime, model_id: str, device: ScorecardDevice, component: Optional[str] = None, @@ -110,27 +139,15 @@ def update(self, other: ScorecardJobYaml): ) self.job_id_mapping.update(other.job_id_mapping) - @abc.abstractmethod - def get_job( - self, - path: Any, - model_id: str, - device: ScorecardDevice, - component: Optional[str] = None, - ) -> CompileScorecardJob | ProfileScorecardJob: - pass - - -class CompileScorecardJobYaml(ScorecardJobYaml): def get_job( self, - path: ScorecardCompilePath, + path: ScorecardPathTypeVar, model_id: str, device: ScorecardDevice, component: Optional[str] = None, - ) -> CompileScorecardJob: + ) -> ScorecardJobTypeVar: """ - Get the compile scorecard job from the YAML associated with these parameters. + Get the scorecard job from the YAML associated with these parameters. parameters: path: Applicable scorecard path @@ -138,38 +155,109 @@ def get_job( device: The targeted device component: The name of the model component being tested, if applicable """ - return CompileScorecardJob( + return self.scorecard_job_type( component or model_id, self.get_job_id( path, model_id, device, component, fallback_to_universal_device=True ), device, + True, None, - path, + path, # type: ignore ) + def get_jobs_from_model_info( + self, model_info: QAIHMModelInfo + ) -> list[ScorecardJobTypeVar]: + """ + Get all jobs in this YAML related to the model information in the given model info class. + """ + components: list[Optional[str]] = [] + if model_info.code_gen_config.components: + if model_info.code_gen_config.default_components: + components = cast( + list[Optional[str]], model_info.code_gen_config.default_components + ) + else: + components = list(model_info.code_gen_config.components.keys()) + else: + components.append(None) -class ProfileScorecardJobYaml(ScorecardJobYaml): - def get_job( - self, - path: ScorecardProfilePath, - model_id: str, - device: ScorecardDevice, - component: Optional[str] = None, - ) -> ProfileScorecardJob: + supports_fp16_npu = not ( + model_info.code_gen_config.is_aimet + or model_info.code_gen_config.use_hub_quantization + ) + + model_runs = [] + for path in self.scorecard_path_type.all_paths(enabled=True): + for component in components: + for device in ScorecardDevice.all_devices( + enabled=True, + supports_fp16_npu=supports_fp16_npu or None, + supports_compile_path=path + if isinstance(path, ScorecardCompilePath) + else None, + supports_profile_path=path + if isinstance(path, ScorecardProfilePath) + else None, + ): + job = self.get_job( + path, model_info.id, device, component # type: ignore + ) + if not component: + job.model_id = model_info.name + + model_runs.append(job) + + return model_runs + + def summaries_from_model_ids( + self, model_ids: list[str] = MODEL_IDS + ) -> dict[str, ScorecardSummaryTypeVar]: """ - Get the profile scorecard job from the YAML associated with these parameters. + Create a summary for each set of jobs related to each model id in the provided list. - parameters: - path: Applicable scorecard path - model_id: The ID of the QAIHM model being tested - device: The targeted device - component: The name of the model component being tested, if applicable + Returns models in this format: + model_id: list[Summary] """ - return ProfileScorecardJob( - component or model_id, - self.get_job_id(path, model_id, device, component), - device, - None, - path, + print(f"Generating {self.scorecard_summary_type.__name__} for Models") + pool = multiprocessing.Pool(processes=15) + model_summaries = pool.map( + self.summary_from_model_id, + model_ids, ) + pool.close() + print("Finished\n") + return {k: v for k, v in zip(model_ids, model_summaries)} + + def summary_from_model_id(self, model_id: str) -> ScorecardSummaryTypeVar: + """ + Creates a summary of all jobs related to the given model id. + """ + print(f" {model_id} ") + runs = self.get_jobs_from_model_info(QAIHMModelInfo.from_model(model_id)) + return self.scorecard_summary_type.from_runs(runs) # type: ignore + + +class CompileScorecardJobYaml( + ScorecardJobYaml[CompileScorecardJob, ScorecardCompilePath, CompileSummary] +): + scorecard_job_type = CompileScorecardJob + scorecard_path_type = ScorecardCompilePath + scorecard_summary_type = CompileSummary + + +class ProfileScorecardJobYaml( + ScorecardJobYaml[ProfileScorecardJob, ScorecardProfilePath, PerfSummary] +): + scorecard_job_type = ProfileScorecardJob + scorecard_path_type = ScorecardProfilePath + scorecard_summary_type = PerfSummary + + +class InferenceScorecardJobYaml( + ScorecardJobYaml[InferenceScorecardJob, ScorecardProfilePath, InferenceSummary] +): + scorecard_job_type = InferenceScorecardJob + scorecard_path_type = ScorecardProfilePath + scorecard_summary_type = InferenceSummary diff --git a/qai_hub_models/utils/args.py b/qai_hub_models/utils/args.py index 142ad563..69fb4575 100644 --- a/qai_hub_models/utils/args.py +++ b/qai_hub_models/utils/args.py @@ -26,11 +26,10 @@ FromPretrainedTypeVar, ) from qai_hub_models.utils.base_model import BaseModel, HubModel, TargetRuntime +from qai_hub_models.utils.default_export_device import DEFAULT_EXPORT_DEVICE from qai_hub_models.utils.inference import OnDeviceModel, compile_model_from_args from qai_hub_models.utils.qai_hub_helpers import can_access_qualcomm_ai_hub -DEFAULT_EXPORT_DEVICE = "Samsung Galaxy S24 (Family)" - class ParseEnumAction(argparse.Action): def __init__(self, option_strings, dest, enum_type, **kwargs): @@ -96,7 +95,7 @@ def get_on_device_demo_parser( TargetRuntime.__members__.values() ), add_output_dir: bool = False, - default_device: str = "Samsung Galaxy S23", + default_device: str = DEFAULT_EXPORT_DEVICE, ): if not parser: parser = get_parser() diff --git a/qai_hub_models/utils/asset_loaders.py b/qai_hub_models/utils/asset_loaders.py index c0298a09..18e552c7 100644 --- a/qai_hub_models/utils/asset_loaders.py +++ b/qai_hub_models/utils/asset_loaders.py @@ -43,7 +43,7 @@ QAIHM_STORE_ROOT = os.environ.get("QAIHM_STORE_ROOT", os.path.expanduser("~")) LOCAL_STORE_DEFAULT_PATH = os.path.join(QAIHM_STORE_ROOT, ".qaihm") - +EXECUTING_IN_CI_ENVIRONMENT = os.getenv("QAIHM_CI", "0") == "1" SOURCE_AS_ROOT_LOCK = threading.Lock() VersionType = Union[str, int] @@ -132,6 +132,7 @@ def maybe_clone_git_repo( model_name: str, model_version: VersionType, patches: list[str] = [], + ask_to_clone: bool = not EXECUTING_IN_CI_ENVIRONMENT, ) -> Path: """Clone (or pull) a repository, save it to disk in a standard location, and return the absolute path to the cloned location. Patches can be applied @@ -147,8 +148,12 @@ def maybe_clone_git_repo( if not os.path.exists(os.path.join(local_path, ".git")): # Clone repo - should_clone = _query_yes_no( - f"{model_name} requires repository {git_file_path} . Ok to clone?", + should_clone = ( + True + if not ask_to_clone + else _query_yes_no( + f"{model_name} requires repository {git_file_path} . Ok to clone?", + ) ) if should_clone: print(f"Cloning {git_file_path} to {local_path}...") @@ -285,6 +290,7 @@ def SourceAsRoot( source_repo_version: int | str, source_repo_patches: list[str] = [], keep_sys_modules: bool = True, + ask_to_clone: bool = not EXECUTING_IN_CI_ENVIRONMENT, ): """ Context manager that runs code with: @@ -301,6 +307,7 @@ def SourceAsRoot( source_repo_name, source_repo_version, patches=source_repo_patches, + ask_to_clone=ask_to_clone, ) ) SOURCE_AS_ROOT_LOCK.acquire() @@ -972,7 +979,7 @@ def download_file(web_url: str, dst_path: str, num_retries: int = 4) -> str: `dst_folder` should be relative to the local cache root for qai_hub_models. """ if not os.path.exists(dst_path): - print(f"Downloading data at {web_url} to {dst_path}... ", end="") + print(f"Downloading data at {web_url} to {dst_path}") # Streaming, so we can iterate over the response. response = requests.get(web_url, stream=True) diff --git a/qai_hub_models/utils/base_model.py b/qai_hub_models/utils/base_model.py index 5b22ba13..1833cf05 100644 --- a/qai_hub_models/utils/base_model.py +++ b/qai_hub_models/utils/base_model.py @@ -48,6 +48,10 @@ def __init__(self): self.get_input_spec = self._get_input_spec_for_instance if self._get_output_names_for_instance.__module__ != __name__: self.get_output_names = self._get_output_names_for_instance + if self._get_channel_last_inputs_for_instance.__module__ != __name__: + self.get_channel_last_inputs = self._get_channel_last_inputs_for_instance + if self._get_channel_last_outputs_for_instance.__module__ != __name__: + self.get_channel_last_outputs = self._get_channel_last_outputs_for_instance def _get_input_spec_for_instance(self, *args, **kwargs) -> InputSpec: """ @@ -72,6 +76,26 @@ def _get_output_names_for_instance(self, *args, **kwargs) -> list[str]: """ raise NotImplementedError + def _get_channel_last_inputs_for_instance(self, *args, **kwargs) -> list[str]: + """ + Get the channel last input names for an instance of this model. + + If this function is implemented by a child class, the initializer for BaseModel + will automatically override get_channel_last_inputs with this function + when the class is instantiated. + """ + raise NotImplementedError + + def _get_channel_last_outputs_for_instance(self, *args, **kwargs) -> list[str]: + """ + Get the channel last output names for an instance of this model. + + If this function is implemented by a child class, the initializer for BaseModel + will automatically override get_channel_last_outputs with this function + when the class is instantiated. + """ + raise NotImplementedError + def sample_inputs( self, input_spec: InputSpec | None = None, diff --git a/qai_hub_models/utils/config_loaders.py b/qai_hub_models/utils/config_loaders.py index 4246f00f..11388dd0 100644 --- a/qai_hub_models/utils/config_loaders.py +++ b/qai_hub_models/utils/config_loaders.py @@ -14,6 +14,7 @@ from typing import Any, Optional, TypeVar, Union, get_args, get_type_hints import requests +import yaml from qai_hub.util.session import create_session from schema import And from schema import Optional as OptionalSchema @@ -197,8 +198,120 @@ def from_yaml( ) -> BaseDataClassTypeVar: return cls.from_dict(load_yaml(path)) - def to_dict(self) -> dict[str, Any]: - return {field.name: getattr(self, field.name) for field in fields(self)} + def to_dict( + self, include_defaults: bool = True, yaml_compatible=False + ) -> dict[str, Any]: + """ + Returns this class as a python dictionary. + + parameters: + include_defaults : bool + If false, dataclass fields will not be included in the dict if set to the fields' default value. + + yaml_compatible : bool + Returns a dict in which all Python objects are converted to a YAML-serializable representation. + """ + return self._complete_partial_dict( + include_defaults=include_defaults, yaml_compatible=yaml_compatible + ) + + def _complete_partial_dict( + self, + partial_yaml_dict: Optional[dict] = None, + include_defaults: bool = True, + yaml_compatible: bool = False, + ): + """ + Fills partial_yaml_dict with all fields of the dataclass that do not exist in the dict. + + parameters: + partial_yaml_dict : Optional[dict] + The dict to fill. If unset, uses a new empty dict. + + include_defaults : bool + If false, dataclass fields will not be included in the dict if set to the fields' default value. + + yaml_compatible : bool + Returns a dict in which all Python objects are converted to a YAML-serializable representation. + + discussion: + This function should be used after to_dict() processes complex values and adds them to the dict. + For example, to_dict() may fill a dict with a special string representation of enum fields. + After that, it passes that dict to this func to naively dump all other fields in the class into the dictionary. + """ + + def _process_dict_field_val(field_val: dict[Any, Any]): + out_dict = {} + for k, v in field_val.items(): + out_dict[_process_field_val(k)] = _process_field_val(v) + return out_dict + + def _process_list_field_val(field_val: list[Any]): + out_list = [] + for val in field_val: + out_list.append(_process_field_val(val)) + return out_list + + def _process_tuple_field_val(field_val: tuple[Any, ...]): + return tuple(_process_list_field_val(list(field_val))) + + def _process_field_val(field_val: Any): + if isinstance(field_val, dict): + return _process_dict_field_val(field_val) + elif isinstance(field_val, list): + return _process_list_field_val(field_val) + elif isinstance(field_val, tuple): + return _process_tuple_field_val(field_val) + elif isinstance(field_val, BaseDataClass): + return field_val.to_dict(include_defaults) + elif yaml_compatible and type(field_val) not in [int, float, bool, str]: + return str(field_val) + return field_val + + fields = dataclasses.fields(self) + yaml_dict = partial_yaml_dict or {} + for field in fields: + default = field.default if field.default_factory != dataclasses._MISSING_TYPE else field.default_factory() # type: ignore + field_val = getattr(self, field.name) + if field.name not in yaml_dict and ( + include_defaults or field_val != default + ): + yaml_dict[field.name] = _process_field_val(field_val) + + return yaml_dict + + def to_yaml( + self, + path: str | Path, + write_if_empty: bool = True, + delete_if_empty: bool = True, + ) -> bool: + """ + Converts this class to a dict and saves that dict to a YAML file. + + parameters: + path : str | Path + Path to save the file. + + write_if_empty : bool + If False, the YAML file will not be written to disk if the dictionary to be saved is empty. + + delete_if_empty: bool + If True, an existing YAML file at the given path will be deleted if the dictionary to be saved is empty. + + discussion: + Generally, the dictionary to be saved to YAML is empty only if: + * all dataclass fields have default values + * every field in this dataclass instance is set to its default value + """ + dict = self.to_dict(include_defaults=False, yaml_compatible=True) + if not dict and not write_if_empty: + if delete_if_empty and os.path.exists(path): + os.remove(path) + return False + with open(path, "w") as yaml_file: + yaml.dump(dict, yaml_file) + return True BaseDataClassTypeVar = TypeVar("BaseDataClassTypeVar", bound="BaseDataClass") @@ -536,16 +649,40 @@ class QAIHMModelCodeGen(BaseDataClass): # If the model doesn't work on qnn, this should explain why, # ideally with a reference to an internal issue. + # + # This field is managed automatically by the scorecard, and should + # not be manually edited after a model is first added. qnn_export_failure_reason: str = "" + # If the model should be disabled for qnn for any reason other than + # a job failure, this should explain why, + # ideally with a reference to an internal issue. + qnn_export_disable_reason: str = "" + # If the model doesn't work on tflite, this should explain why, # ideally with a reference to an internal issue. + # + # This field is managed automatically by the scorecard, and should + # not be manually edited after a model is first added. tflite_export_failure_reason: str = "" + # If the model should be disabled for tflite for any reason other than + # a job failure, this should explain why, + # ideally with a reference to an internal issue. + tflite_export_disable_reason: str = "" + # If the model doesn't work on onnx, this should explain why, # ideally with a reference to an internal issue. + # + # This field is managed automatically by the scorecard, and should + # not be manually edited after a model is first added. onnx_export_failure_reason: str = "" + # If the model should be disabled for onnx for any reason other than + # a job failure, this should explain why, + # ideally with a reference to an internal issue. + onnx_export_disable_reason: str = "" + # If set, changes the default device when running export.py for the model. default_device: Optional[str] = None @@ -556,7 +693,8 @@ class QAIHMModelCodeGen(BaseDataClass): # This can happen when the model outputs many low confidence values that get # filtered out in post-processing. # Omit printing PSNR in `export.py` for these to avoid confusion. - outputs_to_skip_validation: Optional[list[str]] = None + # dict + outputs_to_skip_validation: Optional[dict[int, str]] = None # Additional arguments to initialize the model when unit testing export. # This is commonly used to test a smaller variant in the unit test. @@ -619,9 +757,11 @@ class QAIHMModelCodeGen(BaseDataClass): # The model supports python versions that are at least this version. None == Any version python_version_greater_than_or_equal_to: Optional[str] = None + python_version_greater_than_or_equal_to_reason: Optional[str] = None # The model supports python versions that are less than this version. None == Any version python_version_less_than: Optional[str] = None + python_version_less_than_reason: Optional[str] = None @classmethod def from_model(cls: type[QAIHMModelCodeGen], model_id: str) -> QAIHMModelCodeGen: @@ -635,6 +775,26 @@ def validate(self) -> Optional[str]: return "Flags is_aimet and use_hub_quantization cannot both be set." if self.use_hub_quantization and not self.eval_datasets: return "Must set eval_datasets if use_hub_quantization is set." + if ( + self.python_version_greater_than_or_equal_to is None + and self.python_version_greater_than_or_equal_to_reason is not None + ): + return "python_version_greater_than_or_equal_to_reason is set, but python_version_greater_than_or_equal_to is not." + if ( + self.python_version_greater_than_or_equal_to is not None + and self.python_version_greater_than_or_equal_to_reason is None + ): + return "python_version_greater_than_or_equal_to must have a reason (python_version_greater_than_or_equal_to_reason) set." + if ( + self.python_version_less_than_reason is None + and self.python_version_less_than is not None + ): + return "python_version_less_than must have a reason (python_version_less_than_reason) set." + if ( + self.python_version_less_than_reason is not None + and self.python_version_less_than is None + ): + return "python_version_less_than_reason is set, but python_version_less_than is not." return None @classmethod @@ -643,6 +803,10 @@ def from_yaml(cls: type[QAIHMModelCodeGen], path: str | Path) -> QAIHMModelCodeG return QAIHMModelCodeGen() # Default Schema return super().from_yaml(path) + def to_model_yaml(self, model_id: str): + code_gen_path = QAIHM_MODELS_ROOT / model_id / "code-gen.yaml" + self.to_yaml(code_gen_path, write_if_empty=False, delete_if_empty=True) + @dataclass class QAIHMModelInfo(BaseDataClass): @@ -729,6 +893,9 @@ class QAIHMModelInfo(BaseDataClass): # A link to the model's license. Most commonly found in the github repo it was cloned from. license: Optional[str] = None + # Whether the model is compatible with the IMSDK Plugin for IOT devices + imsdk_supported: bool = False + # A link to the AIHub license, unless the license is more restrictive like GPL. # In that case, this should point to the same as the model license. deploy_license: Optional[str] = None @@ -940,9 +1107,18 @@ def validate(self) -> Optional[str]: return "All public models must have an info.yaml" if ( - self.code_gen_config.tflite_export_failure_reason - and self.code_gen_config.qnn_export_failure_reason - and self.code_gen_config.onnx_export_failure_reason + ( + self.code_gen_config.tflite_export_failure_reason + or self.code_gen_config.tflite_export_disable_reason + ) + and ( + self.code_gen_config.qnn_export_failure_reason + or self.code_gen_config.qnn_export_disable_reason + ) + and ( + self.code_gen_config.onnx_export_failure_reason + or self.code_gen_config.onnx_export_disable_reason + ) ): return "Public models must support at least one export path" diff --git a/qai_hub_models/utils/default_export_device.py b/qai_hub_models/utils/default_export_device.py new file mode 100644 index 00000000..fd1c7ac0 --- /dev/null +++ b/qai_hub_models/utils/default_export_device.py @@ -0,0 +1,5 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +DEFAULT_EXPORT_DEVICE = "Samsung Galaxy S24 (Family)" diff --git a/qai_hub_models/utils/image_processing.py b/qai_hub_models/utils/image_processing.py index 5eef7776..a22c65c3 100644 --- a/qai_hub_models/utils/image_processing.py +++ b/qai_hub_models/utils/image_processing.py @@ -83,11 +83,12 @@ def app_to_net_image_inputs( return NHWC_int_numpy_frames, NCHW_fp32_torch_frames -def preprocess_PIL_image(image: Image) -> torch.Tensor: +def preprocess_PIL_image(image: Image, to_float: bool = True) -> torch.Tensor: """Convert a PIL image into a pyTorch tensor with range [0, 1] and shape NCHW.""" transform = transforms.Compose([transforms.PILToTensor()]) # bgr image - img: torch.Tensor = transform(image) # type: ignore - img = img.float().unsqueeze(0) / 255.0 # int 0 - 255 to float 0.0 - 1.0 + img: torch.Tensor = transform(image).unsqueeze(0) # type: ignore + if to_float: + return img.float() / 255.0 # int 0 - 255 to float 0.0 - 1.0 return img @@ -100,12 +101,15 @@ def preprocess_PIL_image_mask(image_mask: Image) -> torch.Tensor: return mask -def numpy_image_to_torch(image: np.ndarray) -> torch.Tensor: +def numpy_image_to_torch(image: np.ndarray, to_float: bool = True) -> torch.Tensor: """Convert a Numpy image (dtype uint8, shape [H W C] or [N H W C]) into a pyTorch tensor with range [0, 1] and shape NCHW.""" image_torch = torch.from_numpy(image) + if len(image.shape) == 3: image_torch = image_torch.unsqueeze(0) - return image_torch.permute(0, 3, 1, 2).float() / 255.0 + if to_float: + return image_torch.permute(0, 3, 1, 2).float() / 255.0 + return image_torch.permute(0, 3, 1, 2) def torch_tensor_to_PIL_image(data: torch.Tensor) -> Image: diff --git a/qai_hub_models/utils/system_info.py b/qai_hub_models/utils/system_info.py index 50f8d531..b5d11d04 100644 --- a/qai_hub_models/utils/system_info.py +++ b/qai_hub_models/utils/system_info.py @@ -43,7 +43,7 @@ def has_recommended_memory(required_memory_in_gb: float) -> None: "sudo mkswap /local/mnt/swapfile # Set up a Linux swap area", "sudo swapon /local/mnt/swapfile # Turn the swap on", "", - "You can update `count` to increase swap space that works for machine." - "NOTE: above commands does not persist through reboot.", + "You can update `count` to increase swap space that works for machine.", + "NOTE: the above commands will not persist through a reboot.", ] print_with_box(warning_msgs) diff --git a/scripts/tasks/util.py b/scripts/tasks/util.py index f4d00de7..3aef684b 100644 --- a/scripts/tasks/util.py +++ b/scripts/tasks/util.py @@ -85,13 +85,15 @@ def get_model_python_version_requirements( req_less_than, min_version = None, None if os.path.exists(info): info_data = open(info).read() - req_less_than = re.search(r'python_version_less_than:\s*"([\d.]+)"', info_data) + req_less_than = re.search( + r'python_version_less_than:\s*["\']([\d.]+)["\']', info_data + ) if req_less_than: spl = req_less_than.group(1).split(".") req_less_than = int(spl[0]), int(spl[1]) min_version = re.search( - r'python_version_greater_than_or_equal_to:\s*"([\d.]+)"', info_data + r'python_version_greater_than_or_equal_to:\s*["\']([\d.]+)["\']', info_data ) if min_version: spl = min_version.group(1).split(".")