diff --git a/qai_hub_models/_version.py b/qai_hub_models/_version.py index 5afe102e..49a2e97c 100644 --- a/qai_hub_models/_version.py +++ b/qai_hub_models/_version.py @@ -2,4 +2,4 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- -__version__ = "0.7.0" +__version__ = "0.8.0" diff --git a/qai_hub_models/asset_bases.yaml b/qai_hub_models/asset_bases.yaml index 124db058..24e36b5f 100644 --- a/qai_hub_models/asset_bases.yaml +++ b/qai_hub_models/asset_bases.yaml @@ -6,6 +6,7 @@ model_asset_folder: models/{model_id}/v{version} dataset_asset_folder: datasets/{dataset_id}/v{version} repo_url: https://github.com/quic/ai-hub-models/blob/main qaihm_repo: qai_hub_models/models/{model_id} +labels_path: qai_hub_models/labels/{labels_file} example_use: qai_hub_models/models/{model_id}#example--usage huggingface_path: qualcomm/{model_name} models_website_url: https://aihub.qualcomm.com diff --git a/qai_hub_models/evaluators/classification_evaluator.py b/qai_hub_models/evaluators/classification_evaluator.py index 2c00a4e3..c0f767c7 100644 --- a/qai_hub_models/evaluators/classification_evaluator.py +++ b/qai_hub_models/evaluators/classification_evaluator.py @@ -19,20 +19,32 @@ def __init__(self, num_classes: int = 1000): def add_batch(self, output: torch.Tensor, gt: int | torch.Tensor): # This evaluator supports only 1 output tensor at a time. assert len(output.shape) == 2 and output.shape[-1] == self.num_classes - gt_tensor = torch.Tensor(gt) - assert len(gt_tensor.shape) == 1 and gt_tensor.shape[0] == output.shape[0] + gt_tensor = torch.Tensor(gt).unsqueeze(1) + assert len(gt_tensor.shape) == 2 and gt_tensor.shape[0] == output.shape[0] batch_size = output.shape[0] self.total_samples += batch_size - self.num_correct += sum(torch.argmax(output, dim=-1) == gt_tensor) + + top5 = torch.topk(output, 5).indices + self.top5_count += torch.sum(top5 == gt_tensor).item() + self.top1_count += torch.sum(top5[:, :1] == gt_tensor).item() def reset(self): - self.num_correct = 0 + self.top1_count = 0 + self.top5_count = 0 self.total_samples = 0 - def get_accuracy_score(self) -> float: + def top1(self) -> float: if self.total_samples == 0: return 0 - return self.num_correct / self.total_samples + return self.top1_count / self.total_samples + + def top5(self) -> float: + if self.total_samples == 0: + return 0 + return self.top5_count / self.total_samples + + def get_accuracy_score(self) -> float: + return self.top1() def formatted_accuracy(self) -> str: - return f"{self.get_accuracy_score() * 100:.1f}%" + return f"{self.top1() * 100:.1f}% (Top 1), {self.top5() * 100:.1f}% (Top 5)" diff --git a/qai_hub_models/global_requirements.txt b/qai_hub_models/global_requirements.txt index a9b38438..af284942 100644 --- a/qai_hub_models/global_requirements.txt +++ b/qai_hub_models/global_requirements.txt @@ -3,34 +3,45 @@ # - Then install this requirements file # That should create an environment that works for every single model. +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + Deprecated==1.2.11 PySoundFile; sys_platform == 'win32' aimet-torch==1.31.2; sys_platform == "linux" albumentations==0.5.2 -av==10.0.0 basicsr==1.4.2 +boto3==1.34.119 +botocore==1.34.119 click==8.1.7 +coverage==5.3.1 data-gradients==0.3.1 datasets==2.14.5 diffusers[torch]==0.21.4 easydict==1.10 einops==0.3.2 -ffmpeg==1.4 ftfy==6.1.1 hydra-core==1.3.0 imageio[ffmpeg]==2.31.5 imagesize==1.4.1 +jinja2==3.0.3 +keyrings.envvars==1.1.0; python_version >= '3.9' # used only by CI kornia==0.5.0 librosa==0.10.1 mmcv==2.1.0 mmdet==3.2.0 mmpose==1.2.0 +mypy==0.991 object-detection-metrics==0.4.post1 openai-whisper==20230314 +pre-commit==3.5.0 pycocotools==2.0.7 +pytest-cov==4.1.0 +pytest-xdist==3.3.1 pytorch-lightning==1.6.0 rapidfuzz==3.8.1 -regex==2023.12.25 +regex==2023.10.3 +ruamel-yaml==0.18.6 +schema==0.7.5 scikit-image==0.21.0 scikit-learn==1.1.3 scipy==1.8.1 @@ -39,14 +50,18 @@ sentencepiece==0.2.0 shapely==2.0.3 soundfile==0.12.1 stringcase==1.2.0 +tensorboard==2.13.0 tflite==2.10.0 thop==0.1.1.post2209072238 -timm==0.9.11 -tensorboard==2.13.0 +timm==1.0.3 torchaudio==0.13.1 +torchmetrics==1.4.0.post0 transformers==4.41.1 treelib==1.6.1 -tucker-conv==1.0.1 +types-PyYAML==6.0.12.12 +types-pillow==10.2.0.20240213 +types-requests==2.31.0.6 +types-tabulate==0.9.0.20240106 ultralytics==8.0.193 webdataset==0.2.86 yacs==0.1.8 diff --git a/qai_hub_models/labels/cityscapes_labels.txt b/qai_hub_models/labels/cityscapes_labels.txt new file mode 100644 index 00000000..7bd4a33c --- /dev/null +++ b/qai_hub_models/labels/cityscapes_labels.txt @@ -0,0 +1,19 @@ +road +sidewalk +building +wall +fence +pole +traffic light +traffic sign +vegetation +terrain +sky +person +rider +car +truck +bus +train +motorcycle +bicycle diff --git a/qai_hub_models/labels/coco_labels.txt b/qai_hub_models/labels/coco_labels.txt new file mode 100644 index 00000000..941cb4e1 --- /dev/null +++ b/qai_hub_models/labels/coco_labels.txt @@ -0,0 +1,80 @@ +person +bicycle +car +motorcycle +airplane +bus +train +truck +boat +traffic light +fire hydrant +stop sign +parking meter +bench +bird +cat +dog +horse +sheep +cow +elephant +bear +zebra +giraffe +backpack +umbrella +handbag +tie +suitcase +frisbee +skis +snowboard +sports ball +kite +baseball bat +baseball glove +skateboard +surfboard +tennis racket +bottle +wine glass +cup +fork +knife +spoon +bowl +banana +apple +sandwich +orange +broccoli +carrot +hot dog +pizza +donut +cake +chair +couch +potted plant +bed +dining table +toilet +tv +laptop +mouse +remote +keyboard +cell phone +microwave +oven +toaster +sink +refrigerator +book +clock +vase +scissors +teddy bear +hair drier +toothbrush diff --git a/qai_hub_models/labels/imagenet_labels.txt b/qai_hub_models/labels/imagenet_labels.txt new file mode 100644 index 00000000..f40829ed --- /dev/null +++ b/qai_hub_models/labels/imagenet_labels.txt @@ -0,0 +1,1000 @@ +tench +goldfish +great white shark +tiger shark +hammerhead +electric ray +stingray +cock +hen +ostrich +brambling +goldfinch +house finch +junco +indigo bunting +robin +bulbul +jay +magpie +chickadee +water ouzel +kite +bald eagle +vulture +great grey owl +European fire salamander +common newt +eft +spotted salamander +axolotl +bullfrog +tree frog +tailed frog +loggerhead +leatherback turtle +mud turtle +terrapin +box turtle +banded gecko +common iguana +American chameleon +whiptail +agama +frilled lizard +alligator lizard +Gila monster +green lizard +African chameleon +Komodo dragon +African crocodile +American alligator +triceratops +thunder snake +ringneck snake +hognose snake +green snake +king snake +garter snake +water snake +vine snake +night snake +boa constrictor +rock python +Indian cobra +green mamba +sea snake +horned viper +diamondback +sidewinder +trilobite +harvestman +scorpion +black and gold garden spider +barn spider +garden spider +black widow +tarantula +wolf spider +tick +centipede +black grouse +ptarmigan +ruffed grouse +prairie chicken +peacock +quail +partridge +African grey +macaw +sulphur-crested cockatoo +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser +goose +black swan +tusker +echidna +platypus +wallaby +koala +wombat +jellyfish +sea anemone +brain coral +flatworm +nematode +conch +snail +slug +sea slug +chiton +chambered nautilus +Dungeness crab +rock crab +fiddler crab +king crab +American lobster +spiny lobster +crayfish +hermit crab +isopod +white stork +black stork +spoonbill +flamingo +little blue heron +American egret +bittern +crane +limpkin +European gallinule +American coot +bustard +ruddy turnstone +red-backed sandpiper +redshank +dowitcher +oystercatcher +pelican +king penguin +albatross +grey whale +killer whale +dugong +sea lion +Chihuahua +Japanese spaniel +Maltese dog +Pekinese +Shih-Tzu +Blenheim spaniel +papillon +toy terrier +Rhodesian ridgeback +Afghan hound +basset +beagle +bloodhound +bluetick +black-and-tan coonhound +Walker hound +English foxhound +redbone +borzoi +Irish wolfhound +Italian greyhound +whippet +Ibizan hound +Norwegian elkhound +otterhound +Saluki +Scottish deerhound +Weimaraner +Staffordshire bullterrier +American Staffordshire terrier +Bedlington terrier +Border terrier +Kerry blue terrier +Irish terrier +Norfolk terrier +Norwich terrier +Yorkshire terrier +wire-haired fox terrier +Lakeland terrier +Sealyham terrier +Airedale +cairn +Australian terrier +Dandie Dinmont +Boston bull +miniature schnauzer +giant schnauzer +standard schnauzer +Scotch terrier +Tibetan terrier +silky terrier +soft-coated wheaten terrier +West Highland white terrier +Lhasa +flat-coated retriever +curly-coated retriever +golden retriever +Labrador retriever +Chesapeake Bay retriever +German short-haired pointer +vizsla +English setter +Irish setter +Gordon setter +Brittany spaniel +clumber +English springer +Welsh springer spaniel +cocker spaniel +Sussex spaniel +Irish water spaniel +kuvasz +schipperke +groenendael +malinois +briard +kelpie +komondor +Old English sheepdog +Shetland sheepdog +collie +Border collie +Bouvier des Flandres +Rottweiler +German shepherd +Doberman +miniature pinscher +Greater Swiss Mountain dog +Bernese mountain dog +Appenzeller +EntleBucher +boxer +bull mastiff +Tibetan mastiff +French bulldog +Great Dane +Saint Bernard +Eskimo dog +malamute +Siberian husky +dalmatian +affenpinscher +basenji +pug +Leonberg +Newfoundland +Great Pyrenees +Samoyed +Pomeranian +chow +keeshond +Brabancon griffon +Pembroke +Cardigan +toy poodle +miniature poodle +standard poodle +Mexican hairless +timber wolf +white wolf +red wolf +coyote +dingo +dhole +African hunting dog +hyena +red fox +kit fox +Arctic fox +grey fox +tabby +tiger cat +Persian cat +Siamese cat +Egyptian cat +cougar +lynx +leopard +snow leopard +jaguar +lion +tiger +cheetah +brown bear +American black bear +ice bear +sloth bear +mongoose +meerkat +tiger beetle +ladybug +ground beetle +long-horned beetle +leaf beetle +dung beetle +rhinoceros beetle +weevil +fly +bee +ant +grasshopper +cricket +walking stick +cockroach +mantis +cicada +leafhopper +lacewing +dragonfly +damselfly +admiral +ringlet +monarch +cabbage butterfly +sulphur butterfly +lycaenid +starfish +sea urchin +sea cucumber +wood rabbit +hare +Angora +hamster +porcupine +fox squirrel +marmot +beaver +guinea pig +sorrel +zebra +hog +wild boar +warthog +hippopotamus +ox +water buffalo +bison +ram +bighorn +ibex +hartebeest +impala +gazelle +Arabian camel +llama +weasel +mink +polecat +black-footed ferret +otter +skunk +badger +armadillo +three-toed sloth +orangutan +gorilla +chimpanzee +gibbon +siamang +guenon +patas +baboon +macaque +langur +colobus +proboscis monkey +marmoset +capuchin +howler monkey +titi +spider monkey +squirrel monkey +Madagascar cat +indri +Indian elephant +African elephant +lesser panda +giant panda +barracouta +eel +coho +rock beauty +anemone fish +sturgeon +gar +lionfish +puffer +abacus +abaya +academic gown +accordion +acoustic guitar +aircraft carrier +airliner +airship +altar +ambulance +amphibian +analog clock +apiary +apron +ashcan +assault rifle +backpack +bakery +balance beam +balloon +ballpoint +Band Aid +banjo +bannister +barbell +barber chair +barbershop +barn +barometer +barrel +barrow +baseball +basketball +bassinet +bassoon +bathing cap +bath towel +bathtub +beach wagon +beacon +beaker +bearskin +beer bottle +beer glass +bell cote +bib +bicycle-built-for-two +bikini +binder +binoculars +birdhouse +boathouse +bobsled +bolo tie +bonnet +bookcase +bookshop +bottlecap +bow +bow tie +brass +brassiere +breakwater +breastplate +broom +bucket +buckle +bulletproof vest +bullet train +butcher shop +cab +caldron +candle +cannon +canoe +can opener +cardigan +car mirror +carousel +carpenter's kit +carton +car wheel +cash machine +cassette +cassette player +castle +catamaran +CD player +cello +cellular telephone +chain +chainlink fence +chain mail +chain saw +chest +chiffonier +chime +china cabinet +Christmas stocking +church +cinema +cleaver +cliff dwelling +cloak +clog +cocktail shaker +coffee mug +coffeepot +coil +combination lock +computer keyboard +confectionery +container ship +convertible +corkscrew +cornet +cowboy boot +cowboy hat +cradle +crane +crash helmet +crate +crib +Crock Pot +croquet ball +crutch +cuirass +dam +desk +desktop computer +dial telephone +diaper +digital clock +digital watch +dining table +dishrag +dishwasher +disk brake +dock +dogsled +dome +doormat +drilling platform +drum +drumstick +dumbbell +Dutch oven +electric fan +electric guitar +electric locomotive +entertainment center +envelope +espresso maker +face powder +feather boa +file +fireboat +fire engine +fire screen +flagpole +flute +folding chair +football helmet +forklift +fountain +fountain pen +four-poster +freight car +French horn +frying pan +fur coat +garbage truck +gasmask +gas pump +goblet +go-kart +golf ball +golfcart +gondola +gong +gown +grand piano +greenhouse +grille +grocery store +guillotine +hair slide +hair spray +half track +hammer +hamper +hand blower +hand-held computer +handkerchief +hard disc +harmonica +harp +harvester +hatchet +holster +home theater +honeycomb +hook +hoopskirt +horizontal bar +horse cart +hourglass +iPod +iron +jack-o'-lantern +jean +jeep +jersey +jigsaw puzzle +jinrikisha +joystick +kimono +knee pad +knot +lab coat +ladle +lampshade +laptop +lawn mower +lens cap +letter opener +library +lifeboat +lighter +limousine +liner +lipstick +Loafer +lotion +loudspeaker +loupe +lumbermill +magnetic compass +mailbag +mailbox +maillot +maillot +manhole cover +maraca +marimba +mask +matchstick +maypole +maze +measuring cup +medicine chest +megalith +microphone +microwave +military uniform +milk can +minibus +miniskirt +minivan +missile +mitten +mixing bowl +mobile home +Model T +modem +monastery +monitor +moped +mortar +mortarboard +mosque +mosquito net +motor scooter +mountain bike +mountain tent +mouse +mousetrap +moving van +muzzle +nail +neck brace +necklace +nipple +notebook +obelisk +oboe +ocarina +odometer +oil filter +organ +oscilloscope +overskirt +oxcart +oxygen mask +packet +paddle +paddlewheel +padlock +paintbrush +pajama +palace +panpipe +paper towel +parachute +parallel bars +park bench +parking meter +passenger car +patio +pay-phone +pedestal +pencil box +pencil sharpener +perfume +Petri dish +photocopier +pick +pickelhaube +picket fence +pickup +pier +piggy bank +pill bottle +pillow +ping-pong ball +pinwheel +pirate +pitcher +plane +planetarium +plastic bag +plate rack +plow +plunger +Polaroid camera +pole +police van +poncho +pool table +pop bottle +pot +potter's wheel +power drill +prayer rug +printer +prison +projectile +projector +puck +punching bag +purse +quill +quilt +racer +racket +radiator +radio +radio telescope +rain barrel +recreational vehicle +reel +reflex camera +refrigerator +remote control +restaurant +revolver +rifle +rocking chair +rotisserie +rubber eraser +rugby ball +rule +running shoe +safe +safety pin +saltshaker +sandal +sarong +sax +scabbard +scale +school bus +schooner +scoreboard +screen +screw +screwdriver +seat belt +sewing machine +shield +shoe shop +shoji +shopping basket +shopping cart +shovel +shower cap +shower curtain +ski +ski mask +sleeping bag +slide rule +sliding door +slot +snorkel +snowmobile +snowplow +soap dispenser +soccer ball +sock +solar dish +sombrero +soup bowl +space bar +space heater +space shuttle +spatula +speedboat +spider web +spindle +sports car +spotlight +stage +steam locomotive +steel arch bridge +steel drum +stethoscope +stole +stone wall +stopwatch +stove +strainer +streetcar +stretcher +studio couch +stupa +submarine +suit +sundial +sunglass +sunglasses +sunscreen +suspension bridge +swab +sweatshirt +swimming trunks +swing +switch +syringe +table lamp +tank +tape player +teapot +teddy +television +tennis ball +thatch +theater curtain +thimble +thresher +throne +tile roof +toaster +tobacco shop +toilet seat +torch +totem pole +tow truck +toyshop +tractor +trailer truck +tray +trench coat +tricycle +trimaran +tripod +triumphal arch +trolleybus +trombone +tub +turnstile +typewriter keyboard +umbrella +unicycle +upright +vacuum +vase +vault +velvet +vending machine +vestment +viaduct +violin +volleyball +waffle iron +wall clock +wallet +wardrobe +warplane +washbasin +washer +water bottle +water jug +water tower +whiskey jug +whistle +wig +window screen +window shade +Windsor tie +wine bottle +wing +wok +wooden spoon +wool +worm fence +wreck +yawl +yurt +web site +comic book +crossword puzzle +street sign +traffic light +book jacket +menu +plate +guacamole +consomme +hot pot +trifle +ice cream +ice lolly +French loaf +bagel +pretzel +cheeseburger +hotdog +mashed potato +head cabbage +broccoli +cauliflower +zucchini +spaghetti squash +acorn squash +butternut squash +cucumber +artichoke +bell pepper +cardoon +mushroom +Granny Smith +strawberry +orange +lemon +fig +pineapple +banana +jackfruit +custard apple +pomegranate +hay +carbonara +chocolate sauce +dough +meat loaf +pizza +potpie +burrito +red wine +espresso +cup +eggnog +alp +bubble +cliff +coral reef +geyser +lakeside +promontory +sandbar +seashore +valley +volcano +ballplayer +groom +scuba diver +rapeseed +daisy +yellow lady's slipper +corn +acorn +hip +buckeye +coral fungus +agaric +gyromitra +stinkhorn +earthstar +hen-of-the-woods +bolete +ear +toilet tissue diff --git a/qai_hub_models/labels/voc_labels.txt b/qai_hub_models/labels/voc_labels.txt new file mode 100644 index 00000000..c724f9aa --- /dev/null +++ b/qai_hub_models/labels/voc_labels.txt @@ -0,0 +1,21 @@ +BACKGROUND +aeroplane +bicycle +bird +boat +bottle +bus +car +cat +chair +cow +diningtable +dog +horse +motorbike +person +pottedplant +sheep +sofa +train +tvmonitor diff --git a/qai_hub_models/models/_shared/cityscapes_segmentation/app.py b/qai_hub_models/models/_shared/cityscapes_segmentation/app.py index ff51e7f7..03325642 100644 --- a/qai_hub_models/models/_shared/cityscapes_segmentation/app.py +++ b/qai_hub_models/models/_shared/cityscapes_segmentation/app.py @@ -5,7 +5,7 @@ from __future__ import annotations import os -from typing import Optional +from typing import Mapping, Optional, Tuple import numpy as np import torch @@ -25,6 +25,7 @@ MODEL_ID, ) from qai_hub_models.utils.asset_loaders import ASSET_CONFIG, SourceAsRoot +from qai_hub_models.utils.image_processing import pil_resize_pad, pil_undo_resize_pad def _load_cityscapes_loader(cityscapes_path: Optional[str] = None) -> object: @@ -92,9 +93,11 @@ class CityscapesSegmentationApp: def __init__( self, model: torch.nn.Module, + input_specs: Mapping[str, Tuple[Tuple[int, ...], str]], ): self.model = model self.color_mapping = _load_cityscapes_loader().dataset.color_mapping + (_, _, self.model_height, self.model_width) = input_specs["image"][0] def predict(self, image: Image, raw_output: bool = False) -> Image | np.ndarray: """ @@ -111,13 +114,18 @@ def predict(self, image: Image, raw_output: bool = False) -> Image | np.ndarray: WIDTH]. Note, that WIDTH and HEIGHT will be smaller than the input image. """ + resized_image, scale, padding = pil_resize_pad( + image, (self.model_height, self.model_width) + ) - input_tensor = preprocess_cityscapes_image(image) + input_tensor = preprocess_cityscapes_image(resized_image) with torch.no_grad(): small_res_output = self.model(input_tensor) output = F.interpolate( - small_res_output, (image.height, image.width), mode="bilinear" + small_res_output, + (resized_image.height, resized_image.width), + mode="bilinear", ) if raw_output: return output.detach().numpy() @@ -125,6 +133,9 @@ def predict(self, image: Image, raw_output: bool = False) -> Image | np.ndarray: color_mask = ImageModule.fromarray(predictions.astype(np.uint8)).convert("P") color_mask.putpalette(self.color_mapping) - out = ImageModule.blend(image, color_mask.convert("RGB"), 0.5) + out = ImageModule.blend(resized_image, color_mask.convert("RGB"), 0.5) + + # Resize / unpad annotated image + image_annotated = pil_undo_resize_pad(out, image.size, scale, padding) - return out + return image_annotated diff --git a/qai_hub_models/models/_shared/cityscapes_segmentation/demo.py b/qai_hub_models/models/_shared/cityscapes_segmentation/demo.py index 25921362..2bab019b 100644 --- a/qai_hub_models/models/_shared/cityscapes_segmentation/demo.py +++ b/qai_hub_models/models/_shared/cityscapes_segmentation/demo.py @@ -17,12 +17,12 @@ demo_model_from_cli_args, get_model_cli_parser, get_on_device_demo_parser, + input_spec_from_cli_args, validate_on_device_demo_args, ) from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image from qai_hub_models.utils.base_model import TargetRuntime from qai_hub_models.utils.display import display_or_save_image -from qai_hub_models.utils.image_processing import pil_resize_pad, pil_undo_resize_pad # This image showcases the Cityscapes classes (but is not from the dataset) TEST_CITYSCAPES_LIKE_IMAGE_NAME = "cityscapes_like_demo_2048x1024.jpg" @@ -58,22 +58,13 @@ def cityscapes_segmentation_demo( image = args.image image_name = os.path.basename(image) - input_spec = model_type.get_input_spec() - inference_model = demo_model_from_cli_args(model_type, model_id, args) - app = CityscapesSegmentationApp(inference_model) - - (_, _, height, width) = input_spec["image"][0] - orig_image = load_image(image) - image, scale, padding = pil_resize_pad(orig_image, (height, width)) + input_spec = input_spec_from_cli_args(inference_model, args) + app = CityscapesSegmentationApp(inference_model, input_spec) # Run app - image_annotated = app.predict(image) - - # Resize / unpad annotated image - image_annotated = pil_undo_resize_pad( - image_annotated, orig_image.size, scale, padding - ) + orig_image = load_image(image) + image_annotated = app.predict(orig_image) if not is_test: display_or_save_image( diff --git a/qai_hub_models/models/_shared/super_resolution/demo.py b/qai_hub_models/models/_shared/super_resolution/demo.py index 3ed3cb5d..f901b22d 100644 --- a/qai_hub_models/models/_shared/super_resolution/demo.py +++ b/qai_hub_models/models/_shared/super_resolution/demo.py @@ -8,23 +8,35 @@ from typing import List, Type from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp +from qai_hub_models.models._shared.super_resolution.model import ( + MODEL_ASSET_VERSION, + MODEL_ID, +) from qai_hub_models.utils.args import ( demo_model_from_cli_args, get_model_cli_parser, get_on_device_demo_parser, validate_on_device_demo_args, ) -from qai_hub_models.utils.asset_loaders import CachedWebAsset, load_image +from qai_hub_models.utils.asset_loaders import ( + CachedWebAsset, + CachedWebModelAsset, + load_image, +) from qai_hub_models.utils.base_model import BaseModel, TargetRuntime from qai_hub_models.utils.display import display_or_save_image +IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( + MODEL_ID, MODEL_ASSET_VERSION, "super_resolution_input.jpg" +) + # Run Super Resolution end-to-end on a sample image. # The demo will display both the input image and the higher resolution output. def super_resolution_demo( model_cls: Type[BaseModel], model_id: str, - default_image: str | CachedWebAsset, + default_image: str | CachedWebAsset = IMAGE_ADDRESS, is_test: bool = False, available_target_runtimes: List[TargetRuntime] = list( TargetRuntime.__members__.values() diff --git a/qai_hub_models/models/_shared/super_resolution/model.py b/qai_hub_models/models/_shared/super_resolution/model.py new file mode 100644 index 00000000..8042a4c1 --- /dev/null +++ b/qai_hub_models/models/_shared/super_resolution/model.py @@ -0,0 +1,67 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +import torch + +from qai_hub_models.evaluators.base_evaluators import BaseEvaluator +from qai_hub_models.evaluators.superres_evaluator import SuperResolutionOutputEvaluator +from qai_hub_models.utils.base_model import BaseModel +from qai_hub_models.utils.input_spec import InputSpec + +MODEL_ID = __name__.split(".")[-2] +MODEL_ASSET_VERSION = 2 +DEFAULT_SCALE_FACTOR = 4 + + +def validate_scale_factor(scale_factor: int) -> None: + """Only these scales have pre-trained checkpoints available.""" + valid_scales = [2, 3, 4] + assert scale_factor in valid_scales, "`scale_factor` must be in : " + ", ".join( + valid_scales + ) + + +class SuperResolutionModel(BaseModel): + """Base Model for Super Resolution.""" + + def __init__( + self, + model: torch.nn.Module, + scale_factor: int, + ) -> None: + super().__init__() + self.model = model + self.scale_factor = scale_factor + + def get_evaluator(self) -> BaseEvaluator: + return SuperResolutionOutputEvaluator() + + def forward(self, image): + """ + Run Super Resolution on `image`, and produce an upscaled image + + Parameters: + image: Pixel values pre-processed for model consumption. + Range: float[0, 1] + 3-channel Color Space: RGB + + Returns: + image: Pixel values + Range: float[0, 1] + 3-channel Color Space: RGB + """ + return self.model(image) + + @staticmethod + def get_input_spec( + batch_size: int = 1, + num_channels: int = 3, + height: int = 128, + width: int = 128, + ) -> InputSpec: + # Get the input specification ordered (name -> (shape, type)) pairs for this model. + # + # This can be used with the qai_hub python API to declare + # the model input specification upon submitting a profile job. + return {"image": ((batch_size, num_channels, height, width), "float32")} diff --git a/qai_hub_models/models/aotgan/README.md b/qai_hub_models/models/aotgan/README.md index dd02b51a..89ec5bcb 100644 --- a/qai_hub_models/models/aotgan/README.md +++ b/qai_hub_models/models/aotgan/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of AOT-GAN can be found [here](https://github.com/taki0112/AttnGAN-Tensorflow/blob/master/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Aggregated Contextual Transformations for High-Resolution Image Inpainting](https://arxiv.org/abs/2104.01431) diff --git a/qai_hub_models/models/aotgan/export.py b/qai_hub_models/models/aotgan/export.py index e86faccd..ab8656d4 100644 --- a/qai_hub_models/models/aotgan/export.py +++ b/qai_hub_models/models/aotgan/export.py @@ -190,7 +190,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/aotgan/perf.yaml b/qai_hub_models/models/aotgan/perf.yaml index db1d414b..98f6395e 100644 --- a/qai_hub_models/models/aotgan/perf.yaml +++ b/qai_hub_models/models/aotgan/perf.yaml @@ -36,11 +36,11 @@ models: - name: AOT-GAN performance_metrics: - torchscript_onnx_tflite: - inference_time: 164624.0 - throughput: 6.074448440081641 + inference_time: 164177.0 + throughput: 6.0909871662900406 estimated_peak_memory_range: - min: 5124096 - max: 8396488 + min: 3293184 + max: 6670400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: j1glkw4ep + job_id: j1gle2z8p job_status: Passed torchscript_onnx_qnn: - inference_time: 165008.0 - throughput: 6.06031222728595 + inference_time: 165278.0 + throughput: 6.050412033059452 estimated_peak_memory_range: - min: 3850240 - max: 32305264 + min: 4321280 + max: 32279608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +63,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 275 - job_id: jwgovdz45 + job_id: jwgoen0xp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,13 +72,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:39.212193Z' + timestamp: '2024-06-08T22:15:16Z' - torchscript_onnx_tflite: - inference_time: 120767.0 - throughput: 8.280407727276492 + inference_time: 120342.0 + throughput: 8.309650828472188 estimated_peak_memory_range: - min: 2646016 - max: 222181760 + min: 2510848 + max: 224329120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -86,14 +86,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jw561o2vp + job_id: jw56qzj0g job_status: Passed torchscript_onnx_qnn: - inference_time: 121460.0 - throughput: 8.233163181294254 + inference_time: 121373.0 + throughput: 8.2390647013751 estimated_peak_memory_range: - min: 1572864 - max: 147148656 + min: 0 + max: 141486816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +101,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 275 - job_id: j1pvw2q7g + job_id: j1pvzrojg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,13 +110,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:39.339992Z' + timestamp: '2024-06-08T22:15:17Z' - torchscript_onnx_tflite: - inference_time: 164352.0 - throughput: 6.084501557632398 + inference_time: 164129.0 + throughput: 6.092768493075568 estimated_peak_memory_range: - min: 3293184 - max: 6536160 + min: 12288 + max: 2291528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -124,14 +124,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: j1p3monxg + job_id: j1p3q13l5 job_status: Passed torchscript_onnx_qnn: - inference_time: 164668.0 - throughput: 6.072825321252459 + inference_time: 164665.0 + throughput: 6.072935960890292 estimated_peak_memory_range: - min: 4333568 - max: 28875248 + min: 4337664 + max: 28704480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -139,7 +139,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 275 - job_id: jlpev6o75 + job_id: jlpe4w115 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -148,10 +148,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:39.469404Z' + timestamp: '2024-06-08T22:15:19Z' - torchscript_onnx_qnn: - inference_time: 145505.0 - throughput: 6.872616061303735 + inference_time: 145570.0 + throughput: 6.869547296833138 estimated_peak_memory_range: min: 4202496 max: 4202496 @@ -162,7 +162,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 275 - job_id: j7gjl3d7p + job_id: j7gjk2mx5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -171,4 +171,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:39.597982Z' + timestamp: '2024-06-08T22:15:18Z' diff --git a/qai_hub_models/models/baichuan_7b_quantized/README.md b/qai_hub_models/models/baichuan_7b_quantized/README.md index cff30e46..7a79da7a 100644 --- a/qai_hub_models/models/baichuan_7b_quantized/README.md +++ b/qai_hub_models/models/baichuan_7b_quantized/README.md @@ -20,7 +20,7 @@ a hosted Qualcomm® device. ## License - The license for the original implementation of Baichuan-7B can be found [here](https://github.com/baichuan-inc/Baichuan-7B/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://github.com/baichuan-inc/Baichuan-7B/blob/main/LICENSE) ## References * [Baichuan 2: Open Large-scale Language Models](https://arxiv.org/abs/2309.10305) diff --git a/qai_hub_models/models/common.py b/qai_hub_models/models/common.py index bc0886c5..fc75c06c 100644 --- a/qai_hub_models/models/common.py +++ b/qai_hub_models/models/common.py @@ -12,13 +12,16 @@ class TargetRuntime(Enum): TFLITE = 0 QNN = 1 ORT = 2 + PRECOMPILED_ORT = 3 def __str__(self): return self.name.lower() @property def long_name(self): - return f"torchscript_onnx_{self.name.lower()}" + if "precompiled" not in self.name.lower(): + return f"torchscript_onnx_{self.name.lower()}" + return f"{self.name.lower()}" class SourceModelFormat(Enum): diff --git a/qai_hub_models/models/controlnet_quantized/README.md b/qai_hub_models/models/controlnet_quantized/README.md index 2c155773..6e992553 100644 --- a/qai_hub_models/models/controlnet_quantized/README.md +++ b/qai_hub_models/models/controlnet_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of ControlNet can be found [here](https://github.com/lllyasviel/ControlNet/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://github.com/lllyasviel/ControlNet/blob/main/LICENSE) ## References * [Adding Conditional Control to Text-to-Image Diffusion Models](https://arxiv.org/abs/2302.05543) diff --git a/qai_hub_models/models/convnext_tiny/README.md b/qai_hub_models/models/convnext_tiny/README.md index 9e71c767..0c961ecb 100644 --- a/qai_hub_models/models/convnext_tiny/README.md +++ b/qai_hub_models/models/convnext_tiny/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of ConvNext-Tiny can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) diff --git a/qai_hub_models/models/convnext_tiny/evaluate.py b/qai_hub_models/models/convnext_tiny/evaluate.py new file mode 100644 index 00000000..1ae449fa --- /dev/null +++ b/qai_hub_models/models/convnext_tiny/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.convnext_tiny import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/convnext_tiny/export.py b/qai_hub_models/models/convnext_tiny/export.py index 7342e877..3e52ca90 100644 --- a/qai_hub_models/models/convnext_tiny/export.py +++ b/qai_hub_models/models/convnext_tiny/export.py @@ -188,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/convnext_tiny/info.yaml b/qai_hub_models/models/convnext_tiny/info.yaml index f3e7e9e1..9bbe75fa 100644 --- a/qai_hub_models/models/convnext_tiny/info.yaml +++ b/qai_hub_models/models/convnext_tiny/info.yaml @@ -38,3 +38,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/convnext_tiny/perf.yaml b/qai_hub_models/models/convnext_tiny/perf.yaml index 80ea16f7..70048eca 100644 --- a/qai_hub_models/models/convnext_tiny/perf.yaml +++ b/qai_hub_models/models/convnext_tiny/perf.yaml @@ -36,11 +36,11 @@ models: - name: ConvNext-Tiny performance_metrics: - torchscript_onnx_tflite: - inference_time: 5749.0 - throughput: 173.94329448599757 + inference_time: 5717.0 + throughput: 174.91691446562882 estimated_peak_memory_range: - min: 3657728 - max: 74915816 + min: 45056 + max: 2631376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 328 - job_id: jvgdv6nkg + job_id: jvgd7qleg job_status: Passed torchscript_onnx_qnn: - inference_time: 3812.0 - throughput: 262.3294858342078 + inference_time: 3769.0 + throughput: 265.32236667551075 estimated_peak_memory_range: - min: 12288 - max: 202348976 + min: 81920 + max: 202159384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 223 - job_id: j0px109jg + job_id: j0pxe6215 job_status: Passed torchscript_onnx_ort: - inference_time: 16318.0 - throughput: 61.282019855374436 + inference_time: 16427.0 + throughput: 60.875388080599016 estimated_peak_memory_range: - min: 16384 - max: 153639296 + min: 110592 + max: 152489568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jep2mo0x5 + job_id: jep239n4g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:39.628526Z' + timestamp: '2024-06-08T22:16:04Z' - torchscript_onnx_tflite: - inference_time: 3967.0 - throughput: 252.07965717166624 + inference_time: 3988.0 + throughput: 250.75225677031094 estimated_peak_memory_range: min: 16384 - max: 211805024 + max: 212477920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 328 - job_id: jz57do2q5 + job_id: jz57vl3l5 job_status: Passed torchscript_onnx_qnn: inference_time: 2732.0 throughput: 366.03221083455344 estimated_peak_memory_range: - min: 0 - max: 90889216 + min: 741376 + max: 87297136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 223 - job_id: jo5mz9eyp + job_id: jo5mv6yw5 job_status: Passed torchscript_onnx_ort: - inference_time: 11793.0 - throughput: 84.79606546256254 + inference_time: 11884.0 + throughput: 84.14675193537529 estimated_peak_memory_range: - min: 618496 - max: 60160448 + min: 139571200 + max: 200346752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jqpyd8rrp + job_id: jqpyvj07p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:39.628637Z' + timestamp: '2024-06-08T22:16:05Z' - torchscript_onnx_tflite: - inference_time: 5705.0 - throughput: 175.28483786152498 + inference_time: 5701.0 + throughput: 175.40782318891422 estimated_peak_memory_range: - min: 65536 - max: 2353464 + min: 49152 + max: 2985728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 328 - job_id: jqp4wenqg + job_id: jqp4jd0vp job_status: Passed torchscript_onnx_qnn: - inference_time: 3788.0 - throughput: 263.99155227032736 + inference_time: 3779.0 + throughput: 264.6202699126753 estimated_peak_memory_range: - min: 86016 - max: 201872936 + min: 94208 + max: 182002576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 223 - job_id: jopryx8vg + job_id: jopr12j9g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:39.628718Z' + timestamp: '2024-06-08T22:16:03Z' - torchscript_onnx_qnn: - inference_time: 3927.0 - throughput: 254.64731347084287 + inference_time: 3907.0 + throughput: 255.9508574353724 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 223 - job_id: jegne1lvg + job_id: jegnrm8r5 job_status: Passed torchscript_onnx_ort: - inference_time: 17066.0 - throughput: 58.59603890776984 + inference_time: 16908.0 + throughput: 59.143600662408325 estimated_peak_memory_range: - min: 449466368 - max: 449466368 + min: 294563840 + max: 294563840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: j2p0ro32p + job_id: j2p0e2765 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:39.628784Z' + timestamp: '2024-06-08T22:16:06Z' diff --git a/qai_hub_models/models/convnext_tiny_w8a16_quantized/README.md b/qai_hub_models/models/convnext_tiny_w8a16_quantized/README.md index b7a7d0e7..a62f06f9 100644 --- a/qai_hub_models/models/convnext_tiny_w8a16_quantized/README.md +++ b/qai_hub_models/models/convnext_tiny_w8a16_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of ConvNext-Tiny-w8a16-Quantized can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) diff --git a/qai_hub_models/models/convnext_tiny_w8a16_quantized/evaluate.py b/qai_hub_models/models/convnext_tiny_w8a16_quantized/evaluate.py new file mode 100644 index 00000000..362002ba --- /dev/null +++ b/qai_hub_models/models/convnext_tiny_w8a16_quantized/evaluate.py @@ -0,0 +1,64 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.convnext_tiny_w8a16_quantized import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.base_model import BaseModel +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args +from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + supports_tflite=False, + supports_ort=False, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + + # Use Fp16 model for torch inference + for cls in Model.__mro__: + if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin): + torch_cls = cls + break + torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/convnext_tiny_w8a16_quantized/export.py b/qai_hub_models/models/convnext_tiny_w8a16_quantized/export.py index beeafaae..39153360 100644 --- a/qai_hub_models/models/convnext_tiny_w8a16_quantized/export.py +++ b/qai_hub_models/models/convnext_tiny_w8a16_quantized/export.py @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -227,7 +227,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_ort=False) + parser = export_parser(model_cls=Model, supports_tflite=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/convnext_tiny_w8a16_quantized/info.yaml b/qai_hub_models/models/convnext_tiny_w8a16_quantized/info.yaml index 5370c05d..be8bde1b 100644 --- a/qai_hub_models/models/convnext_tiny_w8a16_quantized/info.yaml +++ b/qai_hub_models/models/convnext_tiny_w8a16_quantized/info.yaml @@ -40,3 +40,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/convnext_tiny_w8a16_quantized/perf.yaml b/qai_hub_models/models/convnext_tiny_w8a16_quantized/perf.yaml index 0ae7d848..431c0b4f 100644 --- a/qai_hub_models/models/convnext_tiny_w8a16_quantized/perf.yaml +++ b/qai_hub_models/models/convnext_tiny_w8a16_quantized/perf.yaml @@ -8,10 +8,8 @@ aggregated: - Google Pixel 4 - Google Pixel 4a - Google Pixel 5a 5G - - QCS6490 (Proxy) - QCS8250 (Proxy) - QCS8550 (Proxy) - - RB3 Gen 2 (Proxy) - RB5 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra @@ -30,7 +28,6 @@ aggregated: - Xiaomi 12 - Xiaomi 12 Pro supported_chipsets: - - Qcs6490 - Qcs8250 - Qcs8550 - Snapdragon® 8 Gen 1 @@ -42,11 +39,11 @@ models: - name: ConvNext-Tiny-w8a16-Quantized performance_metrics: - torchscript_onnx_qnn: - inference_time: 3253.0 - throughput: 307.40854595757764 + inference_time: 3272.0 + throughput: 305.6234718826406 estimated_peak_memory_range: - min: 12288 - max: 138858328 + min: 323584 + max: 8383168 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,7 +51,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jw561oevp + job_id: jogkrqm25 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -63,13 +60,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:39.928338Z' + timestamp: '2024-06-08T22:16:49Z' - torchscript_onnx_qnn: - inference_time: 2291.0 - throughput: 436.4906154517678 + inference_time: 2286.0 + throughput: 437.4453193350831 estimated_peak_memory_range: - min: 315392 - max: 90690416 + min: 0 + max: 90112528 primary_compute_unit: NPU precision: int8 layer_info: @@ -77,7 +74,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: j1p3movxg + job_id: jn5q9ro4p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -86,13 +83,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:39.928544Z' + timestamp: '2024-06-08T22:16:50Z' - torchscript_onnx_qnn: - inference_time: 3277.0 - throughput: 305.15715593530666 + inference_time: 3255.0 + throughput: 307.21966205837174 estimated_peak_memory_range: - min: 319488 - max: 8809304 + min: 16384 + max: 11232112 primary_compute_unit: NPU precision: int8 layer_info: @@ -100,7 +97,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: j1pvw207g + job_id: jw56qzl0g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -109,44 +106,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:39.928727Z' - - torchscript_onnx_qnn: - inference_time: 11165.0 - throughput: 89.56560680698611 - estimated_peak_memory_range: - min: 323584 - max: 98049920 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 215 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 215 - job_id: j7gjl3z7p - job_status: Passed - reference_device_info: - name: RB3 Gen 2 (Proxy) - os: '12' - form_factor: Iot - os_name: Android - manufacturer: Qualcomm - chipset: Qcs6490 - timestamp: '2024-05-29T18:59:40.073528Z' - - reference_device_info: - name: RB5 (Proxy) - os: '12' - form_factor: Iot - os_name: Android - manufacturer: Qualcomm - chipset: Qcs8250 - timestamp: '2024-05-29T18:59:40.202310Z' + timestamp: '2024-06-08T22:16:52Z' - torchscript_onnx_qnn: - inference_time: 3621.0 - throughput: 276.16680475006905 + inference_time: 3567.0 + throughput: 280.3476310625175 estimated_peak_memory_range: - min: 417792 - max: 417792 + min: 221184 + max: 221184 primary_compute_unit: NPU precision: int8 layer_info: @@ -154,7 +120,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jwgovdk45 + job_id: j1gle2r8p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -163,4 +129,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.203221Z' + timestamp: '2024-06-08T22:16:51Z' diff --git a/qai_hub_models/models/convnext_tiny_w8a8_quantized/README.md b/qai_hub_models/models/convnext_tiny_w8a8_quantized/README.md index 35d6bf68..d913e14e 100644 --- a/qai_hub_models/models/convnext_tiny_w8a8_quantized/README.md +++ b/qai_hub_models/models/convnext_tiny_w8a8_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of ConvNext-Tiny-w8a8-Quantized can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) diff --git a/qai_hub_models/models/convnext_tiny_w8a8_quantized/evaluate.py b/qai_hub_models/models/convnext_tiny_w8a8_quantized/evaluate.py new file mode 100644 index 00000000..76c29397 --- /dev/null +++ b/qai_hub_models/models/convnext_tiny_w8a8_quantized/evaluate.py @@ -0,0 +1,64 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.convnext_tiny_w8a8_quantized import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.base_model import BaseModel +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args +from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + supports_tflite=False, + supports_ort=False, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + + # Use Fp16 model for torch inference + for cls in Model.__mro__: + if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin): + torch_cls = cls + break + torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/convnext_tiny_w8a8_quantized/export.py b/qai_hub_models/models/convnext_tiny_w8a8_quantized/export.py index 787f0f4f..54c73379 100644 --- a/qai_hub_models/models/convnext_tiny_w8a8_quantized/export.py +++ b/qai_hub_models/models/convnext_tiny_w8a8_quantized/export.py @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -227,7 +227,7 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_ort=False) + parser = export_parser(model_cls=Model, supports_tflite=False, supports_ort=False) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/convnext_tiny_w8a8_quantized/info.yaml b/qai_hub_models/models/convnext_tiny_w8a8_quantized/info.yaml index b3770255..a27d24b3 100644 --- a/qai_hub_models/models/convnext_tiny_w8a8_quantized/info.yaml +++ b/qai_hub_models/models/convnext_tiny_w8a8_quantized/info.yaml @@ -40,3 +40,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/convnext_tiny_w8a8_quantized/perf.yaml b/qai_hub_models/models/convnext_tiny_w8a8_quantized/perf.yaml index 08bb11f7..cc741e4a 100644 --- a/qai_hub_models/models/convnext_tiny_w8a8_quantized/perf.yaml +++ b/qai_hub_models/models/convnext_tiny_w8a8_quantized/perf.yaml @@ -8,10 +8,8 @@ aggregated: - Google Pixel 4 - Google Pixel 4a - Google Pixel 5a 5G - - QCS6490 (Proxy) - QCS8250 (Proxy) - QCS8550 (Proxy) - - RB3 Gen 2 (Proxy) - RB5 (Proxy) - Samsung Galaxy S21 - Samsung Galaxy S21 Ultra @@ -30,7 +28,6 @@ aggregated: - Xiaomi 12 - Xiaomi 12 Pro supported_chipsets: - - Qcs6490 - Qcs8250 - Qcs8550 - Snapdragon® 8 Gen 1 @@ -42,11 +39,11 @@ models: - name: ConvNext-Tiny-w8a8-Quantized performance_metrics: - torchscript_onnx_qnn: - inference_time: 1732.0 - throughput: 577.3672055427252 + inference_time: 1723.0 + throughput: 580.3830528148578 estimated_peak_memory_range: - min: 28672 - max: 12392608 + min: 12288 + max: 127334120 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,7 +51,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jvgdv6ekg + job_id: jygzvjkkp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -63,13 +60,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:40.231037Z' + timestamp: '2024-06-08T22:17:38Z' - torchscript_onnx_qnn: - inference_time: 1204.0 - throughput: 830.5647840531561 + inference_time: 1207.0 + throughput: 828.5004142502071 estimated_peak_memory_range: - min: 163840 - max: 89393856 + min: 12288 + max: 87553664 primary_compute_unit: NPU precision: int8 layer_info: @@ -77,7 +74,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jz57do0q5 + job_id: jz5wmq66g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -86,13 +83,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:40.231081Z' + timestamp: '2024-06-08T22:17:39Z' - torchscript_onnx_qnn: - inference_time: 1729.0 - throughput: 578.368999421631 + inference_time: 1724.0 + throughput: 580.046403712297 estimated_peak_memory_range: - min: 172032 - max: 8506368 + min: 20480 + max: 10474536 primary_compute_unit: NPU precision: int8 layer_info: @@ -100,7 +97,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: j0px10njg + job_id: jnp1qez2g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -109,13 +106,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:40.231120Z' + timestamp: '2024-06-08T22:17:41Z' - torchscript_onnx_qnn: - inference_time: 6345.0 - throughput: 157.60441292356185 + inference_time: 1917.0 + throughput: 521.6484089723526 estimated_peak_memory_range: - min: 163840 - max: 87882064 + min: 503808 + max: 503808 primary_compute_unit: NPU precision: int8 layer_info: @@ -123,38 +120,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 215 - job_id: jo5mz9qyp - job_status: Passed - reference_device_info: - name: RB3 Gen 2 (Proxy) - os: '12' - form_factor: Iot - os_name: Android - manufacturer: Qualcomm - chipset: Qcs6490 - timestamp: '2024-05-29T18:59:40.231157Z' - - reference_device_info: - name: RB5 (Proxy) - os: '12' - form_factor: Iot - os_name: Android - manufacturer: Qualcomm - chipset: Qcs8250 - timestamp: '2024-05-29T18:59:40.231162Z' - - torchscript_onnx_qnn: - inference_time: 1931.0 - throughput: 517.8663904712585 - estimated_peak_memory_range: - min: 499712 - max: 499712 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 215 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 215 - job_id: jqp4wekqg + job_id: jmg99wnlg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -163,4 +129,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.231201Z' + timestamp: '2024-06-08T22:17:40Z' diff --git a/qai_hub_models/models/ddrnet23_slim/README.md b/qai_hub_models/models/ddrnet23_slim/README.md index 22b47996..a72d09f7 100644 --- a/qai_hub_models/models/ddrnet23_slim/README.md +++ b/qai_hub_models/models/ddrnet23_slim/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of DDRNet23-Slim can be found [here](https://github.com/chenjun2hao/DDRNet.pytorch/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Deep Dual-resolution Networks for Real-time and Accurate Semantic Segmentation of Road Scenes](https://arxiv.org/abs/2101.06085) diff --git a/qai_hub_models/models/ddrnet23_slim/export.py b/qai_hub_models/models/ddrnet23_slim/export.py index 8ed00b91..6f64655c 100644 --- a/qai_hub_models/models/ddrnet23_slim/export.py +++ b/qai_hub_models/models/ddrnet23_slim/export.py @@ -187,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -224,7 +224,9 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser( + model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/ddrnet23_slim/info.yaml b/qai_hub_models/models/ddrnet23_slim/info.yaml index cf0776a3..02c624fd 100644 --- a/qai_hub_models/models/ddrnet23_slim/info.yaml +++ b/qai_hub_models/models/ddrnet23_slim/info.yaml @@ -37,3 +37,4 @@ license_type: mit deploy_license_type: AI Model Hub License dataset: - cityscapes +labels_file: cityscapes_labels.txt diff --git a/qai_hub_models/models/ddrnet23_slim/perf.yaml b/qai_hub_models/models/ddrnet23_slim/perf.yaml index 62939a16..0bb7c379 100644 --- a/qai_hub_models/models/ddrnet23_slim/perf.yaml +++ b/qai_hub_models/models/ddrnet23_slim/perf.yaml @@ -36,11 +36,11 @@ models: - name: DDRNet23-Slim performance_metrics: - torchscript_onnx_tflite: - inference_time: 6730.0 - throughput: 148.5884101040119 + inference_time: 6650.0 + throughput: 150.37593984962405 estimated_peak_memory_range: - min: 1011712 - max: 3811568 + min: 57344 + max: 27662296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: j2p0ro22p + job_id: jo5mv3xw5 job_status: Passed torchscript_onnx_ort: - inference_time: 9468.0 - throughput: 105.61892691170257 + inference_time: 9735.0 + throughput: 102.7221366204417 estimated_peak_memory_range: - min: 13135872 - max: 50337872 + min: 12599296 + max: 48937112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +63,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 155 - job_id: jn5q24r75 + job_id: jep23ly4g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,13 +72,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:40.256306Z' + timestamp: '2024-06-08T22:18:08Z' - torchscript_onnx_tflite: - inference_time: 4767.0 - throughput: 209.77554017201595 + inference_time: 4742.0 + throughput: 210.88148460565162 estimated_peak_memory_range: min: 16384 - max: 73077936 + max: 73234384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -86,14 +86,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: j1p87jmz5 + job_id: jegnr3vr5 job_status: Passed torchscript_onnx_ort: - inference_time: 6106.0 - throughput: 163.77333770062233 + inference_time: 6012.0 + throughput: 166.333998669328 estimated_peak_memory_range: - min: 327680 - max: 40134176 + min: 524288 + max: 42757008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +101,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 155 - job_id: j1glkw2ep + job_id: jqpyv637p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,13 +110,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:40.256364Z' + timestamp: '2024-06-08T22:18:09Z' - torchscript_onnx_tflite: - inference_time: 6723.0 - throughput: 148.74312063067083 + inference_time: 6672.0 + throughput: 149.8800959232614 estimated_peak_memory_range: - min: 1007616 - max: 3000632 + min: 991232 + max: 15704000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -124,7 +124,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 131 - job_id: jogky6qyp + job_id: jopr1e39g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -133,13 +133,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:40.256392Z' + timestamp: '2024-06-08T22:18:06Z' - torchscript_onnx_ort: - inference_time: 9545.0 - throughput: 104.76689366160294 + inference_time: 9609.0 + throughput: 104.06910188365075 estimated_peak_memory_range: - min: 13291520 - max: 13291520 + min: 9854976 + max: 9854976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -147,7 +147,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 155 - job_id: jw561ozvp + job_id: j2p0el065 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -156,4 +156,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.256424Z' + timestamp: '2024-06-08T22:18:10Z' diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet/README.md b/qai_hub_models/models/deeplabv3_plus_mobilenet/README.md index 49ab4d78..96ebfd8f 100644 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet/README.md +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of DeepLabV3-Plus-MobileNet can be found [here](https://github.com/jfzhang95/pytorch-deeplab-xception/blob/master/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Rethinking Atrous Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1706.05587) diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet/export.py b/qai_hub_models/models/deeplabv3_plus_mobilenet/export.py index 08966976..e1a7394c 100644 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet/export.py +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet/export.py @@ -187,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet/info.yaml b/qai_hub_models/models/deeplabv3_plus_mobilenet/info.yaml index ca033e0e..a8ca02ac 100644 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet/info.yaml +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet/info.yaml @@ -35,3 +35,4 @@ license_type: mit deploy_license_type: AI Model Hub License dataset: - VOC2012 +labels_file: voc_labels.txt diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet/perf.yaml b/qai_hub_models/models/deeplabv3_plus_mobilenet/perf.yaml index 43963ab9..dc3430eb 100644 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet/perf.yaml +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet/perf.yaml @@ -36,11 +36,11 @@ models: - name: DeepLabV3-Plus-MobileNet performance_metrics: - torchscript_onnx_tflite: - inference_time: 13093.0 - throughput: 76.37668983426258 + inference_time: 13047.0 + throughput: 76.64597225415804 estimated_peak_memory_range: min: 21032960 - max: 67752032 + max: 22679264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 98 - job_id: jwgovdn45 + job_id: jogkr3x25 job_status: Passed torchscript_onnx_qnn: - inference_time: 12869.0 - throughput: 77.70611547128759 + inference_time: 12852.0 + throughput: 77.8089013383131 estimated_peak_memory_range: - min: 3112960 - max: 21269072 + min: 4210688 + max: 20359032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 124 - job_id: jlpev6w75 + job_id: jw56qn40g job_status: Passed torchscript_onnx_ort: - inference_time: 17611.0 - throughput: 56.78269263528477 + inference_time: 17763.0 + throughput: 56.296796712267074 estimated_peak_memory_range: - min: 44322816 - max: 75546832 + min: 40357888 + max: 70272240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jnp18owkg + job_id: j7gjkenx5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:40.282247Z' + timestamp: '2024-06-08T22:18:39Z' - torchscript_onnx_tflite: - inference_time: 9834.0 - throughput: 101.6880211511084 + inference_time: 9612.0 + throughput: 104.03662089055348 estimated_peak_memory_range: - min: 20480 - max: 67577072 + min: 32768 + max: 69905408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 98 - job_id: j1pvw2r7g + job_id: jn5q93q4p job_status: Passed torchscript_onnx_qnn: - inference_time: 9421.0 - throughput: 106.14584439019212 + inference_time: 9482.0 + throughput: 105.4629824931449 estimated_peak_memory_range: - min: 3194880 - max: 58526144 + min: 3174400 + max: 58616848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 124 - job_id: jygz7zjzp + job_id: j1p3qe0l5 job_status: Passed torchscript_onnx_ort: - inference_time: 14395.0 - throughput: 69.46856547412295 + inference_time: 13976.0 + throughput: 71.55123068116771 estimated_peak_memory_range: - min: 28495872 - max: 65141056 + min: 53886976 + max: 88707568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jvgdv6qkg + job_id: jlpe4km15 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:40.282320Z' + timestamp: '2024-06-08T22:18:40Z' - torchscript_onnx_tflite: - inference_time: 13172.0 - throughput: 75.91861524445794 + inference_time: 13150.0 + throughput: 76.04562737642586 estimated_peak_memory_range: - min: 22175744 - max: 38738920 + min: 22147072 + max: 24149720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 98 - job_id: j7gjl327p + job_id: j1gle3m8p job_status: Passed torchscript_onnx_qnn: - inference_time: 12913.0 - throughput: 77.44133818632386 + inference_time: 12879.0 + throughput: 77.64577995185962 estimated_peak_memory_range: - min: 3186688 - max: 18745072 + min: 3198976 + max: 19885424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 124 - job_id: jmg94oyq5 + job_id: j1pvzvkjg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:40.282366Z' + timestamp: '2024-06-08T22:18:38Z' - torchscript_onnx_qnn: - inference_time: 16505.0 - throughput: 60.58770069675856 + inference_time: 16510.0 + throughput: 60.56935190793458 estimated_peak_memory_range: min: 3170304 max: 3170304 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 124 - job_id: jz5w9y3zp + job_id: jwgoe36xp job_status: Passed torchscript_onnx_ort: - inference_time: 16741.0 - throughput: 59.73358819664297 + inference_time: 16653.0 + throughput: 60.04924037710923 estimated_peak_memory_range: - min: 102998016 - max: 102998016 + min: 105144320 + max: 105144320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jz5w9y3jp + job_id: jygzvrdkp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.282417Z' + timestamp: '2024-06-08T22:18:41Z' diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/README.md b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/README.md index 9ee23f8b..79770236 100644 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/README.md +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of DeepLabV3-Plus-MobileNet-Quantized can be found [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Rethinking Atrous Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1706.05587) diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/export.py b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/export.py index 95facb42..88702832 100644 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/export.py +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/export.py @@ -195,7 +195,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/info.yaml b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/info.yaml index 80402e21..d8a6ff9e 100644 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/info.yaml +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/info.yaml @@ -37,3 +37,4 @@ license_type: mit deploy_license_type: AI Model Hub License dataset: - VOC2012 +labels_file: voc_labels.txt diff --git a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/perf.yaml b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/perf.yaml index 76310879..d4f699a6 100644 --- a/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/perf.yaml +++ b/qai_hub_models/models/deeplabv3_plus_mobilenet_quantized/perf.yaml @@ -42,26 +42,26 @@ models: - name: DeepLabV3-Plus-MobileNet-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 3331.0 - throughput: 300.2101471029721 + inference_time: 3596.0 + throughput: 278.08676307007784 estimated_peak_memory_range: - min: 12288 - max: 3077704 + min: 16384 + max: 1830768 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 99 + layers_on_npu: 102 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 99 - job_id: jnp18owlg + total_layers: 102 + job_id: jmg99wllg job_status: Passed torchscript_onnx_qnn: - inference_time: 5345.0 - throughput: 187.0907390084191 + inference_time: 5322.0 + throughput: 187.89928598271325 estimated_peak_memory_range: - min: 172032 - max: 46772064 + min: 806912 + max: 7295144 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +69,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: jo5mz93qp + job_id: jnp1qe48g job_status: Passed torchscript_onnx_ort: - inference_time: 18725.0 - throughput: 53.4045393858478 + inference_time: 16175.0 + throughput: 61.82380216383308 estimated_peak_memory_range: - min: 114765824 - max: 132619664 + min: 42803200 + max: 54255496 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 122 + layers_on_npu: 120 layers_on_gpu: 0 layers_on_cpu: 51 - total_layers: 173 - job_id: j2p0rolep + total_layers: 171 + job_id: j0pxeyk35 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,28 +93,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:40.316733Z' + timestamp: '2024-06-08T22:19:29Z' - torchscript_onnx_tflite: - inference_time: 2493.0 - throughput: 401.1231448054553 + inference_time: 2668.0 + throughput: 374.8125937031484 estimated_peak_memory_range: min: 12288 - max: 57316816 + max: 60104416 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 99 + layers_on_npu: 102 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 99 - job_id: jvgdv6qlg + total_layers: 102 + job_id: jnp1qe42g job_status: Passed torchscript_onnx_qnn: - inference_time: 3954.0 - throughput: 252.90844714213455 + inference_time: 3937.0 + throughput: 254.00050800101602 estimated_peak_memory_range: min: 802816 - max: 61326896 + max: 61474288 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +122,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: jegne13mg + job_id: jvgd7oxrg job_status: Passed torchscript_onnx_ort: - inference_time: 13415.0 - throughput: 74.54342154304882 + inference_time: 12210.0 + throughput: 81.9000819000819 estimated_peak_memory_range: - min: 112525312 - max: 167003632 + min: 33013760 + max: 87227648 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 122 + layers_on_npu: 120 layers_on_gpu: 0 layers_on_cpu: 51 - total_layers: 173 - job_id: j1p87jz85 + total_layers: 171 + job_id: jo5mv3nd5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,28 +146,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:40.316808Z' + timestamp: '2024-06-08T22:19:30Z' - torchscript_onnx_tflite: - inference_time: 3344.0 - throughput: 299.0430622009569 + inference_time: 3596.0 + throughput: 278.08676307007784 estimated_peak_memory_range: min: 12288 - max: 1744048 + max: 8750088 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 99 + layers_on_npu: 102 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 99 - job_id: jz57doxr5 + total_layers: 102 + job_id: jvgd7oxeg job_status: Passed torchscript_onnx_qnn: - inference_time: 5365.0 - throughput: 186.39328984156572 + inference_time: 5333.0 + throughput: 187.51171948246764 estimated_peak_memory_range: - min: 950272 - max: 81304632 + min: 20480 + max: 12661968 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +175,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: jep2molm5 + job_id: jqp4jvl8p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,36 +184,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:40.316853Z' + timestamp: '2024-06-08T22:19:28Z' - torchscript_onnx_tflite: - inference_time: 15002.0 - throughput: 66.65777896280495 + inference_time: 14989.0 + throughput: 66.71559143371806 estimated_peak_memory_range: - min: 5537792 - max: 44275008 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 99 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 99 - job_id: jqp4wevlg - job_status: Passed - torchscript_onnx_qnn: - inference_time: 19890.0 - throughput: 50.27652086475616 - estimated_peak_memory_range: - min: 839680 - max: 50580512 + min: 12288 + max: 39155056 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 100 + layers_on_npu: 102 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 100 - job_id: jqpyd864p + total_layers: 102 + job_id: jz5wmqe3g job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -222,21 +207,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-05-29T18:59:40.316896Z' + timestamp: '2024-06-08T22:19:23Z' - torchscript_onnx_tflite: - inference_time: 120249.0 - throughput: 8.316077472577735 + inference_time: 126163.0 + throughput: 7.926254131559966 estimated_peak_memory_range: - min: 11714560 - max: 29650216 + min: 11575296 + max: 14463800 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 96 + layers_on_npu: 99 layers_on_gpu: 3 layers_on_cpu: 0 - total_layers: 99 - job_id: j0px10y9g + total_layers: 102 + job_id: jmg99wlwg job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -245,13 +230,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-05-29T18:59:40.316921Z' + timestamp: '2024-06-08T22:19:24Z' - torchscript_onnx_qnn: - inference_time: 5376.0 - throughput: 186.01190476190476 + inference_time: 5241.0 + throughput: 190.80328181644725 estimated_peak_memory_range: - min: 790528 - max: 790528 + min: 798720 + max: 798720 primary_compute_unit: NPU precision: int8 layer_info: @@ -259,22 +244,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 100 - job_id: jopryxeeg + job_id: jz57vxyv5 job_status: Passed torchscript_onnx_ort: - inference_time: 33438.0 - throughput: 29.906094862132903 + inference_time: 22921.0 + throughput: 43.628113956633655 estimated_peak_memory_range: - min: 131166208 - max: 131166208 + min: 59097088 + max: 59097088 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 122 + layers_on_npu: 120 layers_on_gpu: 0 layers_on_cpu: 51 - total_layers: 173 - job_id: jogky63op + total_layers: 171 + job_id: jegnr36k5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -283,4 +268,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.316975Z' + timestamp: '2024-06-08T22:19:31Z' diff --git a/qai_hub_models/models/deeplabv3_resnet50/README.md b/qai_hub_models/models/deeplabv3_resnet50/README.md index 1fec3ba3..c7cf9fab 100644 --- a/qai_hub_models/models/deeplabv3_resnet50/README.md +++ b/qai_hub_models/models/deeplabv3_resnet50/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of DeepLabV3-ResNet50 can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Rethinking Atrous Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1706.05587) diff --git a/qai_hub_models/models/deeplabv3_resnet50/export.py b/qai_hub_models/models/deeplabv3_resnet50/export.py index b73993c7..1f3fda9b 100644 --- a/qai_hub_models/models/deeplabv3_resnet50/export.py +++ b/qai_hub_models/models/deeplabv3_resnet50/export.py @@ -188,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -225,7 +225,12 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) + parser = export_parser( + model_cls=Model, + supports_qnn=False, + supports_ort=False, + supports_precompiled_ort=False, + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/deeplabv3_resnet50/info.yaml b/qai_hub_models/models/deeplabv3_resnet50/info.yaml index 6ae16371..ca44a6e5 100644 --- a/qai_hub_models/models/deeplabv3_resnet50/info.yaml +++ b/qai_hub_models/models/deeplabv3_resnet50/info.yaml @@ -35,3 +35,4 @@ has_animated_banner: yes license_type: bsd-3-clause deploy_license_type: AI Model Hub License dataset: [] +labels_file: voc_labels.txt diff --git a/qai_hub_models/models/deeplabv3_resnet50/perf.yaml b/qai_hub_models/models/deeplabv3_resnet50/perf.yaml index 5f4591d3..a21c01a6 100644 --- a/qai_hub_models/models/deeplabv3_resnet50/perf.yaml +++ b/qai_hub_models/models/deeplabv3_resnet50/perf.yaml @@ -36,11 +36,11 @@ models: - name: DeepLabV3-ResNet50 performance_metrics: - torchscript_onnx_tflite: - inference_time: 290505.0 - throughput: 3.4422815442075008 + inference_time: 292980.0 + throughput: 3.413202266366305 estimated_peak_memory_range: - min: 4493312 - max: 181829312 + min: 2162688 + max: 149701296 primary_compute_unit: GPU precision: fp16 layer_info: @@ -48,7 +48,7 @@ models: layers_on_gpu: 95 layers_on_cpu: 0 total_layers: 95 - job_id: j1glkw3lp + job_id: jep23lkrg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -57,13 +57,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:40.360596Z' + timestamp: '2024-06-08T22:19:58Z' - torchscript_onnx_tflite: - inference_time: 217580.0 - throughput: 4.596010662744738 + inference_time: 223885.0 + throughput: 4.466578823949796 estimated_peak_memory_range: - min: 20635648 - max: 55850720 + min: 65536 + max: 32739680 primary_compute_unit: GPU precision: fp16 layer_info: @@ -71,7 +71,7 @@ models: layers_on_gpu: 95 layers_on_cpu: 0 total_layers: 95 - job_id: jw561on7p + job_id: jqpyv618p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -80,13 +80,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:40.360628Z' + timestamp: '2024-06-08T22:19:59Z' - torchscript_onnx_tflite: - inference_time: 293257.0 - throughput: 3.4099782784383663 + inference_time: 291243.0 + throughput: 3.4335589181542563 estimated_peak_memory_range: - min: 2183168 - max: 149075440 + min: 5476352 + max: 182706000 primary_compute_unit: GPU precision: fp16 layer_info: @@ -94,7 +94,7 @@ models: layers_on_gpu: 95 layers_on_cpu: 0 total_layers: 95 - job_id: j1p3moezg + job_id: j2p0elz95 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -103,12 +103,4 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:40.360656Z' - - reference_device_info: - name: Snapdragon X Elite CRD - os: '11' - form_factor: Compute - os_name: Windows - manufacturer: Qualcomm - chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.360664Z' + timestamp: '2024-06-08T22:20:00Z' diff --git a/qai_hub_models/models/densenet121/README.md b/qai_hub_models/models/densenet121/README.md index 1f95a118..a4221d1c 100644 --- a/qai_hub_models/models/densenet121/README.md +++ b/qai_hub_models/models/densenet121/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of DenseNet-121 can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Densely Connected Convolutional Networks](https://arxiv.org/abs/1608.06993) diff --git a/qai_hub_models/models/densenet121/evaluate.py b/qai_hub_models/models/densenet121/evaluate.py new file mode 100644 index 00000000..64361a06 --- /dev/null +++ b/qai_hub_models/models/densenet121/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.densenet121 import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/densenet121/export.py b/qai_hub_models/models/densenet121/export.py index 9e6eb456..9f0c3592 100644 --- a/qai_hub_models/models/densenet121/export.py +++ b/qai_hub_models/models/densenet121/export.py @@ -188,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/densenet121/info.yaml b/qai_hub_models/models/densenet121/info.yaml index 9d1dda3b..a14749dd 100644 --- a/qai_hub_models/models/densenet121/info.yaml +++ b/qai_hub_models/models/densenet121/info.yaml @@ -39,3 +39,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/densenet121/perf.yaml b/qai_hub_models/models/densenet121/perf.yaml index 3b6037e6..4cfd87c1 100644 --- a/qai_hub_models/models/densenet121/perf.yaml +++ b/qai_hub_models/models/densenet121/perf.yaml @@ -36,11 +36,11 @@ models: - name: DenseNet-121 performance_metrics: - torchscript_onnx_tflite: - inference_time: 1945.0 - throughput: 514.1388174807198 + inference_time: 1946.0 + throughput: 513.874614594039 estimated_peak_memory_range: - min: 12288 - max: 20617520 + min: 20480 + max: 2555328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 312 - job_id: jnp18oelg + job_id: jygzvr2op job_status: Passed torchscript_onnx_qnn: - inference_time: 1983.0 - throughput: 504.2864346949067 + inference_time: 1998.0 + throughput: 500.5005005005005 estimated_peak_memory_range: - min: 12288 - max: 29686216 + min: 647168 + max: 7884416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: jqp4wewlg + job_id: jnp1qe28g job_status: Passed torchscript_onnx_ort: - inference_time: 1988.0 - throughput: 503.01810865191146 + inference_time: 1954.0 + throughput: 511.77072671443193 estimated_peak_memory_range: - min: 12288 - max: 45851224 + min: 16384 + max: 41751336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jopryxyeg + job_id: j0pxey935 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:40.378424Z' + timestamp: '2024-06-08T22:20:34Z' - torchscript_onnx_tflite: inference_time: 1318.0 throughput: 758.7253414264036 estimated_peak_memory_range: min: 12288 - max: 96176016 + max: 96529440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 312 - job_id: jvgdv6olg + job_id: jz5wmqw3g job_status: Passed torchscript_onnx_qnn: - inference_time: 1322.0 - throughput: 756.4296520423601 + inference_time: 1329.0 + throughput: 752.4454477050414 estimated_peak_memory_range: - min: 638976 - max: 160369936 + min: 618496 + max: 158201904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: j0px1019g + job_id: jvgd7onrg job_status: Passed torchscript_onnx_ort: - inference_time: 1313.0 - throughput: 761.6146230007616 + inference_time: 1326.0 + throughput: 754.1478129713424 estimated_peak_memory_range: - min: 0 - max: 51247376 + min: 618496 + max: 52734944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jep2momm5 + job_id: jo5mv3ed5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:40.378568Z' + timestamp: '2024-06-08T22:20:35Z' - torchscript_onnx_tflite: - inference_time: 1941.0 - throughput: 515.1983513652756 + inference_time: 1932.0 + throughput: 517.5983436853002 estimated_peak_memory_range: - min: 12288 - max: 3049672 + min: 24576 + max: 2118480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 312 - job_id: jz57dodr5 + job_id: jmg99w0wg job_status: Passed torchscript_onnx_qnn: - inference_time: 1994.0 - throughput: 501.5045135406219 + inference_time: 1991.0 + throughput: 502.26017076845807 estimated_peak_memory_range: - min: 630784 - max: 6220120 + min: 16384 + max: 39662792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: jegne1emg + job_id: jqp4jvn8p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:40.378662Z' + timestamp: '2024-06-08T22:20:33Z' - torchscript_onnx_qnn: - inference_time: 2221.0 - throughput: 450.24763619990995 + inference_time: 2224.0 + throughput: 449.64028776978415 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 372 - job_id: jo5mz9zqp + job_id: jz57vx2v5 job_status: Passed torchscript_onnx_ort: - inference_time: 2070.0 - throughput: 483.09178743961354 + inference_time: 2023.0 + throughput: 494.3153732081068 estimated_peak_memory_range: - min: 634880 - max: 634880 + min: 647168 + max: 647168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 374 - job_id: jqpyd8d4p + job_id: jegnr30k5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.378764Z' + timestamp: '2024-06-08T22:20:36Z' diff --git a/qai_hub_models/models/detr_resnet101/README.md b/qai_hub_models/models/detr_resnet101/README.md index adaff2fc..662a86c6 100644 --- a/qai_hub_models/models/detr_resnet101/README.md +++ b/qai_hub_models/models/detr_resnet101/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of DETR-ResNet101 can be found [here](https://github.com/facebookresearch/detr/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [End-to-End Object Detection with Transformers](https://arxiv.org/abs/2005.12872) diff --git a/qai_hub_models/models/detr_resnet101/export.py b/qai_hub_models/models/detr_resnet101/export.py index 863e0358..8456a642 100644 --- a/qai_hub_models/models/detr_resnet101/export.py +++ b/qai_hub_models/models/detr_resnet101/export.py @@ -186,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -215,7 +215,9 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser( + model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/detr_resnet101/perf.yaml b/qai_hub_models/models/detr_resnet101/perf.yaml index 3bfa8d81..ab111618 100644 --- a/qai_hub_models/models/detr_resnet101/perf.yaml +++ b/qai_hub_models/models/detr_resnet101/perf.yaml @@ -36,11 +36,11 @@ models: - name: DETR-ResNet101 performance_metrics: - torchscript_onnx_tflite: - inference_time: 24796.0 - throughput: 40.32908533634457 + inference_time: 24522.0 + throughput: 40.779708017290595 estimated_peak_memory_range: - min: 430080 - max: 3276392 + min: 405504 + max: 3620824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 839 - job_id: j1p87j785 + job_id: jep23lxrg job_status: Passed torchscript_onnx_ort: - inference_time: 22705.0 - throughput: 44.04316229905307 + inference_time: 22510.0 + throughput: 44.4247001332741 estimated_peak_memory_range: - min: 2146304 - max: 309086736 + min: 53248 + max: 301197496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +63,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: j1pvw2wmg + job_id: jw56qn26g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,13 +72,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:40.413162Z' + timestamp: '2024-06-08T22:21:16Z' - torchscript_onnx_tflite: - inference_time: 17296.0 - throughput: 57.816836262719704 + inference_time: 17593.0 + throughput: 56.840788950150625 estimated_peak_memory_range: - min: 36864 - max: 283795968 + min: 385024 + max: 284374432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -86,14 +86,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 839 - job_id: jogky6yop + job_id: jqpyv6z8p job_status: Passed torchscript_onnx_ort: - inference_time: 16129.0 - throughput: 62.000124000248 + inference_time: 15841.0 + throughput: 63.127327820213374 estimated_peak_memory_range: - min: 2801664 - max: 112669552 + min: 38055936 + max: 153822592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +101,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: j7gjl3l8p + job_id: j1p3qen35 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,13 +110,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:40.413365Z' + timestamp: '2024-06-08T22:21:17Z' - torchscript_onnx_tflite: - inference_time: 24577.0 - throughput: 40.68844854945681 + inference_time: 24627.0 + throughput: 40.60583911966541 estimated_peak_memory_range: - min: 434176 - max: 3536864 + min: 413696 + max: 3309184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -124,7 +124,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 839 - job_id: jn5q242m5 + job_id: j2p0el495 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -133,13 +133,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:40.413467Z' + timestamp: '2024-06-08T22:21:11Z' - torchscript_onnx_ort: - inference_time: 22988.0 - throughput: 43.50095702105446 + inference_time: 22958.0 + throughput: 43.557801202195314 estimated_peak_memory_range: - min: 115417088 - max: 115417088 + min: 100909056 + max: 100909056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -147,7 +147,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: jlpev6v05 + job_id: jwgoe3zqp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -156,4 +156,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.413572Z' + timestamp: '2024-06-08T22:21:18Z' diff --git a/qai_hub_models/models/detr_resnet101/requirements.txt b/qai_hub_models/models/detr_resnet101/requirements.txt index 546533cd..f43e4f4b 100644 --- a/qai_hub_models/models/detr_resnet101/requirements.txt +++ b/qai_hub_models/models/detr_resnet101/requirements.txt @@ -1,2 +1,2 @@ transformers==4.41.1 -timm==0.9.11 +timm==1.0.3 diff --git a/qai_hub_models/models/detr_resnet101_dc5/README.md b/qai_hub_models/models/detr_resnet101_dc5/README.md index 272c64ab..e8c9e777 100644 --- a/qai_hub_models/models/detr_resnet101_dc5/README.md +++ b/qai_hub_models/models/detr_resnet101_dc5/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of DETR-ResNet101-DC5 can be found [here](https://github.com/facebookresearch/detr/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [End-to-End Object Detection with Transformers](https://arxiv.org/abs/2005.12872) diff --git a/qai_hub_models/models/detr_resnet101_dc5/export.py b/qai_hub_models/models/detr_resnet101_dc5/export.py index cefe54f7..260f917d 100644 --- a/qai_hub_models/models/detr_resnet101_dc5/export.py +++ b/qai_hub_models/models/detr_resnet101_dc5/export.py @@ -186,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -215,7 +215,9 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser( + model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/detr_resnet101_dc5/perf.yaml b/qai_hub_models/models/detr_resnet101_dc5/perf.yaml index 06ea9ea9..6760c8c6 100644 --- a/qai_hub_models/models/detr_resnet101_dc5/perf.yaml +++ b/qai_hub_models/models/detr_resnet101_dc5/perf.yaml @@ -36,11 +36,11 @@ models: - name: DETR-ResNet101-DC5 performance_metrics: - torchscript_onnx_tflite: - inference_time: 151967.0 - throughput: 6.580376002684793 + inference_time: 139662.0 + throughput: 7.160143775687016 estimated_peak_memory_range: - min: 1191936 - max: 4041416 + min: 1216512 + max: 4184536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 840 - job_id: jvgdv6vlg + job_id: j7gjkezv5 job_status: Passed torchscript_onnx_ort: - inference_time: 126534.0 - throughput: 7.903014209619549 + inference_time: 125062.0 + throughput: 7.996033967152292 estimated_peak_memory_range: - min: 2162688 - max: 314190936 + min: 2994176 + max: 315584184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +63,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: jqpyd844p + job_id: jz57vx0v5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,13 +72,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:40.439417Z' + timestamp: '2024-06-08T22:21:59Z' - torchscript_onnx_tflite: - inference_time: 107197.0 - throughput: 9.32861927106169 + inference_time: 106500.0 + throughput: 9.389671361502348 estimated_peak_memory_range: - min: 221184 - max: 493147472 + min: 991232 + max: 494886848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -86,14 +86,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 840 - job_id: jz57dojr5 + job_id: jlpe4keo5 job_status: Passed torchscript_onnx_ort: - inference_time: 95203.0 - throughput: 10.503870676344233 + inference_time: 96040.0 + throughput: 10.412328196584756 estimated_peak_memory_range: - min: 4079616 - max: 168798432 + min: 4145152 + max: 167656240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +101,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: j2p0ro1ep + job_id: jqp4jvk8p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,13 +110,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:40.439619Z' + timestamp: '2024-06-08T22:22:00Z' - torchscript_onnx_tflite: - inference_time: 141441.0 - throughput: 7.0700857601402705 + inference_time: 139388.0 + throughput: 7.174218727580566 estimated_peak_memory_range: - min: 16384 - max: 4988056 + min: 1548288 + max: 4377008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -124,7 +124,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 840 - job_id: jqp4wexlg + job_id: jygzvroop job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -133,13 +133,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:40.439721Z' + timestamp: '2024-06-08T22:21:54Z' - torchscript_onnx_ort: - inference_time: 125955.0 - throughput: 7.939343416299472 + inference_time: 124053.0 + throughput: 8.061070671406576 estimated_peak_memory_range: - min: 121176064 - max: 121176064 + min: 73572352 + max: 73572352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -147,7 +147,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 856 - job_id: j1p87j385 + job_id: j0pxeyn35 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -156,4 +156,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.439826Z' + timestamp: '2024-06-08T22:22:01Z' diff --git a/qai_hub_models/models/detr_resnet101_dc5/requirements.txt b/qai_hub_models/models/detr_resnet101_dc5/requirements.txt index 546533cd..f43e4f4b 100644 --- a/qai_hub_models/models/detr_resnet101_dc5/requirements.txt +++ b/qai_hub_models/models/detr_resnet101_dc5/requirements.txt @@ -1,2 +1,2 @@ transformers==4.41.1 -timm==0.9.11 +timm==1.0.3 diff --git a/qai_hub_models/models/detr_resnet50/README.md b/qai_hub_models/models/detr_resnet50/README.md index e37e8210..df378aea 100644 --- a/qai_hub_models/models/detr_resnet50/README.md +++ b/qai_hub_models/models/detr_resnet50/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of DETR-ResNet50 can be found [here](https://github.com/facebookresearch/detr/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [End-to-End Object Detection with Transformers](https://arxiv.org/abs/2005.12872) diff --git a/qai_hub_models/models/detr_resnet50/export.py b/qai_hub_models/models/detr_resnet50/export.py index f7bd5816..aae80346 100644 --- a/qai_hub_models/models/detr_resnet50/export.py +++ b/qai_hub_models/models/detr_resnet50/export.py @@ -186,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -215,7 +215,9 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser( + model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/detr_resnet50/perf.yaml b/qai_hub_models/models/detr_resnet50/perf.yaml index 974d50cd..fff85076 100644 --- a/qai_hub_models/models/detr_resnet50/perf.yaml +++ b/qai_hub_models/models/detr_resnet50/perf.yaml @@ -36,11 +36,11 @@ models: - name: DETR-ResNet50 performance_metrics: - torchscript_onnx_tflite: - inference_time: 20875.0 - throughput: 47.90419161676647 + inference_time: 21615.0 + throughput: 46.26416840157298 estimated_peak_memory_range: - min: 421888 - max: 3357136 + min: 2134016 + max: 5200288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 771 - job_id: jw561o37p + job_id: jegnr3lk5 job_status: Passed torchscript_onnx_ort: - inference_time: 16647.0 - throughput: 60.070883642698384 + inference_time: 16643.0 + throughput: 60.08532115604158 estimated_peak_memory_range: - min: 708608 - max: 211147208 + min: 1540096 + max: 211446576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +63,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 737 - job_id: jz5w9ydjp + job_id: j1gle36jp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,13 +72,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:40.465610Z' + timestamp: '2024-06-08T22:22:37Z' - torchscript_onnx_tflite: - inference_time: 14432.0 - throughput: 69.29046563192905 + inference_time: 15132.0 + throughput: 66.08511763150939 estimated_peak_memory_range: - min: 385024 - max: 232248816 + min: 36864 + max: 231347824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -86,14 +86,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 771 - job_id: j1p3mo4zg + job_id: jep23l0rg job_status: Passed torchscript_onnx_ort: - inference_time: 11774.0 - throughput: 84.93290300662477 + inference_time: 11694.0 + throughput: 85.51393877201984 estimated_peak_memory_range: - min: 4878336 - max: 101878160 + min: 2809856 + max: 97007056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +101,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 737 - job_id: jmg94o3v5 + job_id: jw56qne6g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,13 +110,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:40.465801Z' + timestamp: '2024-06-08T22:22:38Z' - torchscript_onnx_tflite: - inference_time: 20845.0 - throughput: 47.97313504437515 + inference_time: 21665.0 + throughput: 46.157396722824835 estimated_peak_memory_range: - min: 462848 - max: 4452912 + min: 438272 + max: 4576272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -124,7 +124,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 771 - job_id: jwgovd1d5 + job_id: jqpyv6r8p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -133,13 +133,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:40.465902Z' + timestamp: '2024-06-08T22:22:32Z' - torchscript_onnx_ort: - inference_time: 17028.0 - throughput: 58.72680291284942 + inference_time: 16944.0 + throughput: 59.01794145420208 estimated_peak_memory_range: - min: 40251392 - max: 40251392 + min: 116158464 + max: 116158464 primary_compute_unit: NPU precision: fp16 layer_info: @@ -147,7 +147,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 737 - job_id: jnp18odlg + job_id: j1p3qev35 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -156,4 +156,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.465999Z' + timestamp: '2024-06-08T22:22:39Z' diff --git a/qai_hub_models/models/detr_resnet50/requirements.txt b/qai_hub_models/models/detr_resnet50/requirements.txt index 546533cd..f43e4f4b 100644 --- a/qai_hub_models/models/detr_resnet50/requirements.txt +++ b/qai_hub_models/models/detr_resnet50/requirements.txt @@ -1,2 +1,2 @@ transformers==4.41.1 -timm==0.9.11 +timm==1.0.3 diff --git a/qai_hub_models/models/detr_resnet50_dc5/README.md b/qai_hub_models/models/detr_resnet50_dc5/README.md index 39efe2a1..0e3471c6 100644 --- a/qai_hub_models/models/detr_resnet50_dc5/README.md +++ b/qai_hub_models/models/detr_resnet50_dc5/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of DETR-ResNet50-DC5 can be found [here](https://github.com/facebookresearch/detr/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [End-to-End Object Detection with Transformers](https://arxiv.org/abs/2005.12872) diff --git a/qai_hub_models/models/detr_resnet50_dc5/export.py b/qai_hub_models/models/detr_resnet50_dc5/export.py index adfa7e7f..af83c17f 100644 --- a/qai_hub_models/models/detr_resnet50_dc5/export.py +++ b/qai_hub_models/models/detr_resnet50_dc5/export.py @@ -186,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -215,7 +215,9 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser( + model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/detr_resnet50_dc5/perf.yaml b/qai_hub_models/models/detr_resnet50_dc5/perf.yaml index ee2eb6c6..b9e42be2 100644 --- a/qai_hub_models/models/detr_resnet50_dc5/perf.yaml +++ b/qai_hub_models/models/detr_resnet50_dc5/perf.yaml @@ -36,11 +36,11 @@ models: - name: DETR-ResNet50-DC5 performance_metrics: - torchscript_onnx_tflite: - inference_time: 134142.0 - throughput: 7.454786718551982 + inference_time: 133335.0 + throughput: 7.49990625117186 estimated_peak_memory_range: - min: 1200128 - max: 4600904 + min: 135168 + max: 3805824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 772 - job_id: jz57dovr5 + job_id: jz5wmq33g job_status: Passed torchscript_onnx_ort: - inference_time: 119136.0 - throughput: 8.393768466290625 + inference_time: 117630.0 + throughput: 8.501232678738416 estimated_peak_memory_range: - min: 0 - max: 233208792 + min: 2134016 + max: 232241232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +63,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 737 - job_id: jqpyd8v4p + job_id: jo5mv36d5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,13 +72,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:40.491723Z' + timestamp: '2024-06-08T22:23:14Z' - torchscript_onnx_tflite: - inference_time: 101510.0 - throughput: 9.851246182642104 + inference_time: 102075.0 + throughput: 9.796718099436688 estimated_peak_memory_range: - min: 1228800 - max: 446135248 + min: 163840 + max: 444293712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -86,14 +86,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 772 - job_id: jqp4wejlg + job_id: jmg99wywg job_status: Passed torchscript_onnx_ort: - inference_time: 91238.0 - throughput: 10.960345470089218 + inference_time: 90172.0 + throughput: 11.089917047420485 estimated_peak_memory_range: - min: 4784128 - max: 148984528 + min: 6778880 + max: 152435808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +101,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 737 - job_id: j2p0roeep + job_id: jegnr3mk5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,13 +110,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:40.491907Z' + timestamp: '2024-06-08T22:23:15Z' - torchscript_onnx_tflite: - inference_time: 134198.0 - throughput: 7.451675881905841 + inference_time: 132335.0 + throughput: 7.556579891940908 estimated_peak_memory_range: min: 1204224 - max: 4475272 + max: 4586176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -124,7 +124,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 772 - job_id: j0px10e9g + job_id: jnp1qew8g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -133,13 +133,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:40.492030Z' + timestamp: '2024-06-08T22:23:09Z' - torchscript_onnx_ort: - inference_time: 118988.0 - throughput: 8.404208827780952 + inference_time: 116939.0 + throughput: 8.551467004164564 estimated_peak_memory_range: - min: 43630592 - max: 43630592 + min: 22482944 + max: 22482944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -147,7 +147,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 737 - job_id: j1p87jw85 + job_id: jopr1e20g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -156,4 +156,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.492122Z' + timestamp: '2024-06-08T22:23:16Z' diff --git a/qai_hub_models/models/detr_resnet50_dc5/requirements.txt b/qai_hub_models/models/detr_resnet50_dc5/requirements.txt index 546533cd..f43e4f4b 100644 --- a/qai_hub_models/models/detr_resnet50_dc5/requirements.txt +++ b/qai_hub_models/models/detr_resnet50_dc5/requirements.txt @@ -1,2 +1,2 @@ transformers==4.41.1 -timm==0.9.11 +timm==1.0.3 diff --git a/qai_hub_models/models/efficientnet_b0/README.md b/qai_hub_models/models/efficientnet_b0/README.md index 31379b53..56096dcb 100644 --- a/qai_hub_models/models/efficientnet_b0/README.md +++ b/qai_hub_models/models/efficientnet_b0/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of EfficientNet-B0 can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946) diff --git a/qai_hub_models/models/efficientnet_b0/evaluate.py b/qai_hub_models/models/efficientnet_b0/evaluate.py new file mode 100644 index 00000000..253f3004 --- /dev/null +++ b/qai_hub_models/models/efficientnet_b0/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.efficientnet_b0 import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/efficientnet_b0/export.py b/qai_hub_models/models/efficientnet_b0/export.py index 7b0d5f3d..d4d7827c 100644 --- a/qai_hub_models/models/efficientnet_b0/export.py +++ b/qai_hub_models/models/efficientnet_b0/export.py @@ -188,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/efficientnet_b0/info.yaml b/qai_hub_models/models/efficientnet_b0/info.yaml index 0e74436b..f02c5b91 100644 --- a/qai_hub_models/models/efficientnet_b0/info.yaml +++ b/qai_hub_models/models/efficientnet_b0/info.yaml @@ -40,3 +40,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/efficientnet_b0/perf.yaml b/qai_hub_models/models/efficientnet_b0/perf.yaml index 65190420..6383102d 100644 --- a/qai_hub_models/models/efficientnet_b0/perf.yaml +++ b/qai_hub_models/models/efficientnet_b0/perf.yaml @@ -36,11 +36,11 @@ models: - name: EfficientNet-B0 performance_metrics: - torchscript_onnx_tflite: - inference_time: 1618.0 - throughput: 618.0469715698393 + inference_time: 1626.0 + throughput: 615.0061500615006 estimated_peak_memory_range: min: 16384 - max: 2344216 + max: 1985056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jn5q249m5 + job_id: jqpyv6j8p job_status: Passed torchscript_onnx_qnn: - inference_time: 1684.0 - throughput: 593.8242280285035 + inference_time: 1678.0 + throughput: 595.9475566150179 estimated_peak_memory_range: - min: 622592 - max: 88873088 + min: 16384 + max: 315561544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: j1p3moqzg + job_id: jogkr3qw5 job_status: Passed torchscript_onnx_ort: - inference_time: 1598.0 - throughput: 625.7822277847309 + inference_time: 1623.0 + throughput: 616.1429451632779 estimated_peak_memory_range: - min: 12288 - max: 80031016 + min: 16384 + max: 80982248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jygz7zv6p + job_id: j1p3qe135 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:40.517590Z' + timestamp: '2024-06-08T22:23:45Z' - torchscript_onnx_tflite: - inference_time: 1139.0 - throughput: 877.9631255487269 + inference_time: 1142.0 + throughput: 875.6567425569177 estimated_peak_memory_range: min: 16384 - max: 71725056 + max: 72610976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j1glkwelp + job_id: j2p0el295 job_status: Passed torchscript_onnx_qnn: - inference_time: 1177.0 - throughput: 849.6176720475786 + inference_time: 1186.0 + throughput: 843.1703204047218 estimated_peak_memory_range: min: 618496 - max: 72406400 + max: 72353488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: jwgovded5 + job_id: jn5q93rnp job_status: Passed torchscript_onnx_ort: - inference_time: 1134.0 - throughput: 881.8342151675485 + inference_time: 1173.0 + throughput: 852.5149190110827 estimated_peak_memory_range: - min: 0 - max: 33758960 + min: 618496 + max: 36882944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jz5w9ymjp + job_id: jwgoe3nqp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:40.517700Z' + timestamp: '2024-06-08T22:23:46Z' - torchscript_onnx_tflite: - inference_time: 1630.0 - throughput: 613.4969325153374 + inference_time: 1631.0 + throughput: 613.1207847946046 estimated_peak_memory_range: min: 16384 - max: 2786328 + max: 2841808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jw561oq7p + job_id: j1p8wzmkp job_status: Passed torchscript_onnx_qnn: - inference_time: 1680.0 - throughput: 595.2380952380952 + inference_time: 1683.0 + throughput: 594.1770647653001 estimated_peak_memory_range: - min: 634880 - max: 88349960 + min: 622592 + max: 88821056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: j7gjl3k8p + job_id: jw56qnz6g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:40.517774Z' + timestamp: '2024-06-08T22:23:44Z' - torchscript_onnx_qnn: - inference_time: 1830.0 - throughput: 546.448087431694 + inference_time: 1838.0 + throughput: 544.069640914037 estimated_peak_memory_range: - min: 602112 - max: 602112 + min: 1310720 + max: 1310720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 243 - job_id: j1pvw2zmg + job_id: j1gle32jp job_status: Passed torchscript_onnx_ort: inference_time: 1641.0 throughput: 609.3845216331505 estimated_peak_memory_range: - min: 41422848 - max: 41422848 + min: 32149504 + max: 32149504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jmg94o9v5 + job_id: j1pvzvvkg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.517848Z' + timestamp: '2024-06-08T22:23:47Z' diff --git a/qai_hub_models/models/esrgan/README.md b/qai_hub_models/models/esrgan/README.md index 4afc7424..7b22d043 100644 --- a/qai_hub_models/models/esrgan/README.md +++ b/qai_hub_models/models/esrgan/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of ESRGAN can be found [here](https://github.com/xinntao/ESRGAN/blob/master/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [ESRGAN: Enhanced Super-Resolution Generative Adversarial Networks](https://arxiv.org/abs/1809.00219) diff --git a/qai_hub_models/models/esrgan/export.py b/qai_hub_models/models/esrgan/export.py index 8603628c..47c6b95d 100644 --- a/qai_hub_models/models/esrgan/export.py +++ b/qai_hub_models/models/esrgan/export.py @@ -186,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/esrgan/perf.yaml b/qai_hub_models/models/esrgan/perf.yaml index f84124f1..e35a93e5 100644 --- a/qai_hub_models/models/esrgan/perf.yaml +++ b/qai_hub_models/models/esrgan/perf.yaml @@ -36,11 +36,11 @@ models: - name: ESRGAN performance_metrics: - torchscript_onnx_tflite: - inference_time: 67687.0 - throughput: 14.773885679672611 + inference_time: 66520.0 + throughput: 15.033072760072159 estimated_peak_memory_range: - min: 3313664 - max: 7995200 + min: 4288512 + max: 7346848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: jz5w9yx6p + job_id: jlpe4kko5 job_status: Passed torchscript_onnx_qnn: - inference_time: 66775.0 - throughput: 14.97566454511419 + inference_time: 67593.0 + throughput: 14.794431376030062 estimated_peak_memory_range: - min: 237568 - max: 105521768 + min: 73728 + max: 104762776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: jvgdv60eg + job_id: jmg99wwwg job_status: Passed torchscript_onnx_ort: - inference_time: 70447.0 - throughput: 14.195068633156842 + inference_time: 68322.0 + throughput: 14.636573870788325 estimated_peak_memory_range: - min: 4218880 - max: 159787592 + min: 6356992 + max: 154422496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jo5mz94wp + job_id: jmg99ww8g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:40.552188Z' + timestamp: '2024-06-08T22:24:28Z' - torchscript_onnx_tflite: - inference_time: 53811.0 - throughput: 18.58356098195536 + inference_time: 56935.0 + throughput: 17.56388864494599 estimated_peak_memory_range: - min: 3256320 - max: 587536480 + min: 86016 + max: 583340176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: jmg94o8l5 + job_id: jygzvrrop job_status: Passed torchscript_onnx_qnn: - inference_time: 50812.0 - throughput: 19.680390458946704 + inference_time: 50707.0 + throughput: 19.72114303745045 estimated_peak_memory_range: - min: 77824 - max: 256960720 + min: 73728 + max: 260404000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: jz57do6l5 + job_id: jnp1qee8g job_status: Passed torchscript_onnx_ort: - inference_time: 53028.0 - throughput: 18.85796183148525 + inference_time: 51557.0 + throughput: 19.396008301491552 estimated_peak_memory_range: - min: 6365184 - max: 191210256 + min: 5955584 + max: 196150816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jegne1xrg + job_id: jnp1qee7g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:40.552520Z' + timestamp: '2024-06-08T22:24:29Z' - torchscript_onnx_tflite: - inference_time: 75584.0 - throughput: 13.230313293818798 + inference_time: 65283.0 + throughput: 15.31792350229003 estimated_peak_memory_range: - min: 28672 - max: 5061616 + min: 1536000 + max: 4290816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1024 - job_id: jnp18o32g + job_id: jz5wmqq3g job_status: Passed torchscript_onnx_qnn: - inference_time: 64917.0 - throughput: 15.40428547221837 + inference_time: 65436.0 + throughput: 15.282107708295127 estimated_peak_memory_range: - min: 110592 - max: 58149984 + min: 2744320 + max: 60284768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: j0px10m1g + job_id: jz5wmqqmg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:40.552756Z' + timestamp: '2024-06-08T22:24:27Z' - torchscript_onnx_qnn: - inference_time: 73244.0 - throughput: 13.652995467205505 + inference_time: 73135.0 + throughput: 13.67334381623026 estimated_peak_memory_range: - min: 204800 - max: 204800 + min: 221184 + max: 221184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1026 - job_id: jqp4we8vg + job_id: jvgd7oorg job_status: Passed torchscript_onnx_ort: - inference_time: 65794.0 - throughput: 15.198954311943337 + inference_time: 65785.0 + throughput: 15.20103367028958 estimated_peak_memory_range: - min: 1208320 - max: 1208320 + min: 208896 + max: 208896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jopryx99g + job_id: jvgd7oozg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.552990Z' + timestamp: '2024-06-08T22:24:30Z' diff --git a/qai_hub_models/models/facebook_denoiser/README.md b/qai_hub_models/models/facebook_denoiser/README.md index eaa05e87..d4302a62 100644 --- a/qai_hub_models/models/facebook_denoiser/README.md +++ b/qai_hub_models/models/facebook_denoiser/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Facebook-Denoiser can be found [here](https://github.com/facebookresearch/denoiser/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Real Time Speech Enhancement in the Waveform Domain](https://arxiv.org/abs/2006.12847) diff --git a/qai_hub_models/models/facebook_denoiser/export.py b/qai_hub_models/models/facebook_denoiser/export.py index b03be595..11ecd12d 100644 --- a/qai_hub_models/models/facebook_denoiser/export.py +++ b/qai_hub_models/models/facebook_denoiser/export.py @@ -172,7 +172,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -201,7 +201,9 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser( + model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/facebook_denoiser/perf.yaml b/qai_hub_models/models/facebook_denoiser/perf.yaml index 3fbd9376..912fe7c1 100644 --- a/qai_hub_models/models/facebook_denoiser/perf.yaml +++ b/qai_hub_models/models/facebook_denoiser/perf.yaml @@ -36,26 +36,26 @@ models: - name: Facebook-Denoiser performance_metrics: - torchscript_onnx_tflite: - inference_time: 736952.0 - throughput: 1.356940479162822 + inference_time: 762754.0 + throughput: 1.3110386835073955 estimated_peak_memory_range: - min: 92352512 - max: 464837840 + min: 271872000 + max: 745165216 primary_compute_unit: CPU precision: fp32 layer_info: layers_on_npu: 0 layers_on_gpu: 0 - layers_on_cpu: 209 - total_layers: 209 - job_id: jqpyd8n7p + layers_on_cpu: 205 + total_layers: 205 + job_id: jqp4jvv1p job_status: Passed torchscript_onnx_ort: - inference_time: 14540395.0 - throughput: 0.06877392257913213 + inference_time: 14425872.0 + throughput: 0.06931989969133236 estimated_peak_memory_range: - min: 847872 - max: 89968496 + min: 73728 + max: 97772968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +63,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 178 - job_id: jogky6d2p + job_id: jegnr33q5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,28 +72,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:40.586858Z' + timestamp: '2024-06-08T22:24:56Z' - torchscript_onnx_tflite: - inference_time: 777305.0 - throughput: 1.2864962916744394 + inference_time: 700116.0 + throughput: 1.4283347331013718 estimated_peak_memory_range: - min: 366411776 - max: 386789680 + min: 418246656 + max: 442262688 primary_compute_unit: CPU precision: fp32 layer_info: layers_on_npu: 0 layers_on_gpu: 0 - layers_on_cpu: 209 - total_layers: 209 - job_id: j2p0rok6p + layers_on_cpu: 205 + total_layers: 205 + job_id: j0pxeyyl5 job_status: Passed torchscript_onnx_ort: - inference_time: 10689541.0 - throughput: 0.09354938626457394 + inference_time: 10632015.0 + throughput: 0.09405554826625057 estimated_peak_memory_range: - min: 19763200 - max: 227250656 + min: 16744448 + max: 226752096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +101,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 178 - job_id: jn5q24w45 + job_id: jopr1ee7g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,21 +110,21 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:40.586927Z' + timestamp: '2024-06-08T22:24:57Z' - torchscript_onnx_tflite: - inference_time: 756067.0 - throughput: 1.3226341051785093 + inference_time: 733772.0 + throughput: 1.3628211488037156 estimated_peak_memory_range: - min: 456364032 - max: 459426960 + min: 89939968 + max: 463947896 primary_compute_unit: CPU precision: fp32 layer_info: layers_on_npu: 0 layers_on_gpu: 0 - layers_on_cpu: 209 - total_layers: 209 - job_id: j1p87j8x5 + layers_on_cpu: 205 + total_layers: 205 + job_id: jo5mv3395 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -133,10 +133,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:40.586964Z' + timestamp: '2024-06-08T22:24:54Z' - torchscript_onnx_ort: - inference_time: 16116345.0 - throughput: 0.06204880821302845 + inference_time: 15555145.0 + throughput: 0.06428741101416927 estimated_peak_memory_range: min: 450560 max: 450560 @@ -147,7 +147,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 178 - job_id: j1glkw78p + job_id: jep23llqg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -156,4 +156,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.586998Z' + timestamp: '2024-06-08T22:24:58Z' diff --git a/qai_hub_models/models/fastsam_s/README.md b/qai_hub_models/models/fastsam_s/README.md index 717a36e8..516dc401 100644 --- a/qai_hub_models/models/fastsam_s/README.md +++ b/qai_hub_models/models/fastsam_s/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of FastSam-S can be found [here](https://github.com/CASIA-IVA-Lab/FastSAM/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://github.com/CASIA-IVA-Lab/FastSAM/blob/main/LICENSE) ## References * [Fast Segment Anything](https://arxiv.org/abs/2306.12156) diff --git a/qai_hub_models/models/fastsam_s/export.py b/qai_hub_models/models/fastsam_s/export.py index 2b188980..aad089b0 100644 --- a/qai_hub_models/models/fastsam_s/export.py +++ b/qai_hub_models/models/fastsam_s/export.py @@ -190,7 +190,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -227,7 +227,9 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser( + model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/fastsam_s/perf.yaml b/qai_hub_models/models/fastsam_s/perf.yaml index 8fc1e10c..708e66d8 100644 --- a/qai_hub_models/models/fastsam_s/perf.yaml +++ b/qai_hub_models/models/fastsam_s/perf.yaml @@ -36,11 +36,11 @@ models: - name: FastSam-S performance_metrics: - torchscript_onnx_tflite: - inference_time: 8641.0 - throughput: 115.72734637194769 + inference_time: 8700.0 + throughput: 114.94252873563218 estimated_peak_memory_range: - min: 8404992 - max: 11000944 + min: 8429568 + max: 39456112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 288 - job_id: j1p3mo8lg + job_id: j2p0elln5 job_status: Passed torchscript_onnx_ort: - inference_time: 10946.0 - throughput: 91.35757354284671 + inference_time: 10893.0 + throughput: 91.80207472688883 estimated_peak_memory_range: - min: 19734528 - max: 75024696 + min: 26902528 + max: 83130600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +63,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jmg94okl5 + job_id: jwgoe3vkp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,13 +72,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:40.613022Z' + timestamp: '2024-06-08T22:25:27Z' - torchscript_onnx_tflite: - inference_time: 6423.0 - throughput: 155.6904873112253 + inference_time: 6426.0 + throughput: 155.6178026766262 estimated_peak_memory_range: - min: 6889472 - max: 77082752 + min: 6594560 + max: 79404896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -86,14 +86,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 288 - job_id: jwgovdmx5 + job_id: j1p8wzzop job_status: Passed torchscript_onnx_ort: - inference_time: 8057.0 - throughput: 124.11567580985478 + inference_time: 7507.0 + throughput: 133.20900492873318 estimated_peak_memory_range: - min: 24772608 - max: 65740448 + min: 27897856 + max: 69661040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +101,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jnp18o72g + job_id: j1pvzvwrg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,13 +110,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:40.613103Z' + timestamp: '2024-06-08T22:25:28Z' - torchscript_onnx_tflite: - inference_time: 8777.0 - throughput: 113.93414606357526 + inference_time: 8693.0 + throughput: 115.03508570113885 estimated_peak_memory_range: - min: 7831552 - max: 10988912 + min: 3923968 + max: 21721296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -124,7 +124,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 288 - job_id: j1pvw24jg + job_id: jogkr33n5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -133,13 +133,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:40.613145Z' + timestamp: '2024-06-08T22:25:21Z' - torchscript_onnx_ort: - inference_time: 10792.0 - throughput: 92.66123054114159 + inference_time: 10798.0 + throughput: 92.60974254491572 estimated_peak_memory_range: - min: 55435264 - max: 55435264 + min: 72966144 + max: 72966144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -147,7 +147,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jvgdv68eg + job_id: j7gjkele5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -156,4 +156,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.613189Z' + timestamp: '2024-06-08T22:25:29Z' diff --git a/qai_hub_models/models/fastsam_x/README.md b/qai_hub_models/models/fastsam_x/README.md index b3c84891..0c34311d 100644 --- a/qai_hub_models/models/fastsam_x/README.md +++ b/qai_hub_models/models/fastsam_x/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of FastSam-X can be found [here](https://github.com/CASIA-IVA-Lab/FastSAM/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://github.com/CASIA-IVA-Lab/FastSAM/blob/main/LICENSE) ## References * [Fast Segment Anything](https://arxiv.org/abs/2306.12156) diff --git a/qai_hub_models/models/fastsam_x/export.py b/qai_hub_models/models/fastsam_x/export.py index 32977477..fb2a5872 100644 --- a/qai_hub_models/models/fastsam_x/export.py +++ b/qai_hub_models/models/fastsam_x/export.py @@ -190,7 +190,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -227,7 +227,9 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser( + model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/fastsam_x/perf.yaml b/qai_hub_models/models/fastsam_x/perf.yaml index eb05cebb..51138c0a 100644 --- a/qai_hub_models/models/fastsam_x/perf.yaml +++ b/qai_hub_models/models/fastsam_x/perf.yaml @@ -36,11 +36,11 @@ models: - name: FastSam-X performance_metrics: - torchscript_onnx_tflite: - inference_time: 50032.0 - throughput: 19.987208186760473 + inference_time: 53656.0 + throughput: 18.637244669748025 estimated_peak_memory_range: min: 9220096 - max: 14175736 + max: 14211840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 420 - job_id: jqp4wemvg + job_id: jygzvr7xp job_status: Passed torchscript_onnx_ort: - inference_time: 50303.0 - throughput: 19.879530047909668 + inference_time: 51625.0 + throughput: 19.37046004842615 estimated_peak_memory_range: - min: 26087424 - max: 347775896 + min: 25325568 + max: 343683192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +63,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 421 - job_id: j2p0rod6p + job_id: j0pxey1l5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,13 +72,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:40.639222Z' + timestamp: '2024-06-08T22:26:04Z' - torchscript_onnx_tflite: - inference_time: 36166.0 - throughput: 27.650279267820604 + inference_time: 36229.0 + throughput: 27.602197134891938 estimated_peak_memory_range: - min: 7733248 - max: 142126784 + min: 8450048 + max: 144127216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -86,14 +86,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 420 - job_id: j0px1031g + job_id: jz5wmq9mg job_status: Passed torchscript_onnx_ort: - inference_time: 36822.0 - throughput: 27.1576774754223 + inference_time: 37119.0 + throughput: 26.94038093698645 estimated_peak_memory_range: - min: 28086272 - max: 92380224 + min: 29941760 + max: 95002704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +101,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 421 - job_id: j1p87j6x5 + job_id: jo5mv3z95 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,13 +110,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:40.639336Z' + timestamp: '2024-06-08T22:26:05Z' - torchscript_onnx_tflite: - inference_time: 52994.0 - throughput: 18.870060761595653 + inference_time: 49800.0 + throughput: 20.080321285140563 estimated_peak_memory_range: - min: 3325952 - max: 7764344 + min: 9379840 + max: 47006488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -124,7 +124,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 420 - job_id: jo5mz9owp + job_id: jmg99w48g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -133,13 +133,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:40.639421Z' + timestamp: '2024-06-08T22:25:58Z' - torchscript_onnx_ort: - inference_time: 49386.0 - throughput: 20.248653464544606 + inference_time: 49559.0 + throughput: 20.177969692689523 estimated_peak_memory_range: - min: 24240128 - max: 24240128 + min: 30785536 + max: 30785536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -147,7 +147,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 421 - job_id: jogky6o2p + job_id: jegnr3eq5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -156,4 +156,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.639483Z' + timestamp: '2024-06-08T22:26:06Z' diff --git a/qai_hub_models/models/fcn_resnet50/README.md b/qai_hub_models/models/fcn_resnet50/README.md index c2af6df1..dba323b0 100644 --- a/qai_hub_models/models/fcn_resnet50/README.md +++ b/qai_hub_models/models/fcn_resnet50/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of FCN-ResNet50 can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Fully Convolutional Networks for Semantic Segmentation](https://arxiv.org/abs/1411.4038) diff --git a/qai_hub_models/models/fcn_resnet50/export.py b/qai_hub_models/models/fcn_resnet50/export.py index 4c3eb67f..cc121898 100644 --- a/qai_hub_models/models/fcn_resnet50/export.py +++ b/qai_hub_models/models/fcn_resnet50/export.py @@ -187,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/fcn_resnet50/info.yaml b/qai_hub_models/models/fcn_resnet50/info.yaml index fcaeaafa..eb1d3a92 100644 --- a/qai_hub_models/models/fcn_resnet50/info.yaml +++ b/qai_hub_models/models/fcn_resnet50/info.yaml @@ -37,3 +37,4 @@ license_type: bsd-3-clause deploy_license_type: AI Model Hub License dataset: - coco +labels_file: voc_labels.txt diff --git a/qai_hub_models/models/fcn_resnet50/perf.yaml b/qai_hub_models/models/fcn_resnet50/perf.yaml index d6b15006..8d3c9539 100644 --- a/qai_hub_models/models/fcn_resnet50/perf.yaml +++ b/qai_hub_models/models/fcn_resnet50/perf.yaml @@ -36,11 +36,11 @@ models: - name: FCN-ResNet50 performance_metrics: - torchscript_onnx_tflite: - inference_time: 42516.0 - throughput: 23.520556966788973 + inference_time: 41432.0 + throughput: 24.135933577910794 estimated_peak_memory_range: - min: 22130688 - max: 24822256 + min: 22097920 + max: 25129176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: j1glkwo8p + job_id: jep23lmqg job_status: Passed torchscript_onnx_qnn: - inference_time: 42169.0 - throughput: 23.714102776921436 + inference_time: 42249.0 + throughput: 23.669199270988663 estimated_peak_memory_range: - min: 3162112 - max: 21014016 + min: 3497984 + max: 21232048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jwgovdox5 + job_id: j1p8wz7op job_status: Passed torchscript_onnx_ort: - inference_time: 43060.0 - throughput: 23.22340919647004 + inference_time: 43347.0 + throughput: 23.069647265093316 estimated_peak_memory_range: - min: 44617728 - max: 204107384 + min: 44056576 + max: 204120472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jygz7z8kp + job_id: jw56qn3yg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:40.665232Z' + timestamp: '2024-06-08T22:26:38Z' - torchscript_onnx_tflite: - inference_time: 30738.0 - throughput: 32.533021016331574 + inference_time: 31357.0 + throughput: 31.890805880664605 estimated_peak_memory_range: - min: 21647360 - max: 155527440 + min: 49152 + max: 137281408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jw561or0p + job_id: jqpyv6dlp job_status: Passed torchscript_onnx_qnn: - inference_time: 31677.0 - throughput: 31.56864602077217 + inference_time: 31599.0 + throughput: 31.64657109402196 estimated_peak_memory_range: - min: 2584576 - max: 80802912 + min: 3162112 + max: 80794592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: j1pvw2ejg + job_id: jogkr3yn5 job_status: Passed torchscript_onnx_ort: - inference_time: 32035.0 - throughput: 31.21585765568909 + inference_time: 32324.0 + throughput: 30.936765251825268 estimated_peak_memory_range: - min: 47235072 - max: 116713856 + min: 43311104 + max: 107423312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jz5w9y16p + job_id: j1p3qe4n5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:40.665299Z' + timestamp: '2024-06-08T22:26:39Z' - torchscript_onnx_tflite: - inference_time: 42133.0 - throughput: 23.734364987064772 + inference_time: 41734.0 + throughput: 23.9612785738247 estimated_peak_memory_range: - min: 12288 - max: 30781808 + min: 22106112 + max: 24857096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: j1p3moxlg + job_id: j2p0elrn5 job_status: Passed torchscript_onnx_qnn: - inference_time: 42154.0 - throughput: 23.72254115860891 + inference_time: 42169.0 + throughput: 23.714102776921436 estimated_peak_memory_range: - min: 3178496 - max: 21842136 + min: 3166208 + max: 19865232 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: jlpev6815 + job_id: j1gle30mp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:40.665342Z' + timestamp: '2024-06-08T22:26:37Z' - torchscript_onnx_qnn: - inference_time: 68527.0 - throughput: 14.59278824404979 + inference_time: 70340.0 + throughput: 14.216661927779358 estimated_peak_memory_range: min: 3153920 max: 3153920 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 127 - job_id: j7gjl3oxp + job_id: jn5q932op job_status: Passed torchscript_onnx_ort: - inference_time: 42361.0 - throughput: 23.606619296050614 + inference_time: 42281.0 + throughput: 23.651285447364064 estimated_peak_memory_range: - min: 15384576 - max: 15384576 + min: 9379840 + max: 9379840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jmg94oxl5 + job_id: jwgoe31kp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.665389Z' + timestamp: '2024-06-08T22:26:40Z' diff --git a/qai_hub_models/models/fcn_resnet50_quantized/README.md b/qai_hub_models/models/fcn_resnet50_quantized/README.md index e74a2898..3ed8a452 100644 --- a/qai_hub_models/models/fcn_resnet50_quantized/README.md +++ b/qai_hub_models/models/fcn_resnet50_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of FCN-ResNet50-Quantized can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Fully Convolutional Networks for Semantic Segmentation](https://arxiv.org/abs/1411.4038) diff --git a/qai_hub_models/models/fcn_resnet50_quantized/export.py b/qai_hub_models/models/fcn_resnet50_quantized/export.py index 943f9fe5..646bdaab 100644 --- a/qai_hub_models/models/fcn_resnet50_quantized/export.py +++ b/qai_hub_models/models/fcn_resnet50_quantized/export.py @@ -195,7 +195,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/fcn_resnet50_quantized/info.yaml b/qai_hub_models/models/fcn_resnet50_quantized/info.yaml index 21939860..19b875e5 100644 --- a/qai_hub_models/models/fcn_resnet50_quantized/info.yaml +++ b/qai_hub_models/models/fcn_resnet50_quantized/info.yaml @@ -39,3 +39,4 @@ license_type: bsd-3-clause deploy_license_type: AI Model Hub License dataset: - coco +labels_file: voc_labels.txt diff --git a/qai_hub_models/models/fcn_resnet50_quantized/perf.yaml b/qai_hub_models/models/fcn_resnet50_quantized/perf.yaml index acff4279..83867788 100644 --- a/qai_hub_models/models/fcn_resnet50_quantized/perf.yaml +++ b/qai_hub_models/models/fcn_resnet50_quantized/perf.yaml @@ -42,11 +42,11 @@ models: - name: FCN-ResNet50-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 14122.0 - throughput: 70.8114997875655 + inference_time: 14137.0 + throughput: 70.73636556553724 estimated_peak_memory_range: - min: 6492160 - max: 8334248 + min: 7475200 + max: 59586696 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +54,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 87 - job_id: jqp4we9vg + job_id: j7gjke0e5 job_status: Passed torchscript_onnx_qnn: - inference_time: 15195.0 - throughput: 65.81112207963146 + inference_time: 15266.0 + throughput: 65.5050438883794 estimated_peak_memory_range: - min: 16384 - max: 135971112 + min: 839680 + max: 9922576 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +69,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jep2mov45 + job_id: jnp1qed7g job_status: Passed torchscript_onnx_ort: - inference_time: 18653.0 - throughput: 53.61067924730607 + inference_time: 12789.0 + throughput: 78.19219641879741 estimated_peak_memory_range: - min: 44085248 - max: 93745064 + min: 9297920 + max: 58295544 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 82 + layers_on_npu: 80 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 82 - job_id: jn5q24m45 + total_layers: 80 + job_id: j0pxey7l5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +93,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:40.699656Z' + timestamp: '2024-06-08T22:28:08Z' - torchscript_onnx_tflite: - inference_time: 10017.0 - throughput: 99.83028850953379 + inference_time: 10012.0 + throughput: 99.88014382740711 estimated_peak_memory_range: - min: 45056 - max: 82919904 + min: 73728 + max: 83075216 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +107,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 87 - job_id: j0px10d1g + job_id: jlpe4krv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 11194.0 - throughput: 89.33357155619082 + inference_time: 11234.0 + throughput: 89.01548869503294 estimated_peak_memory_range: - min: 1011712 - max: 55512672 + min: 802816 + max: 55488784 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +122,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jqpyd877p + job_id: jvgd7orzg job_status: Passed torchscript_onnx_ort: - inference_time: 14507.0 - throughput: 68.93223960846488 + inference_time: 9614.0 + throughput: 104.01497815685458 estimated_peak_memory_range: - min: 51236864 - max: 96653104 + min: 11309056 + max: 56165696 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 82 + layers_on_npu: 80 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 82 - job_id: j1glkw18p + total_layers: 80 + job_id: jegnr39q5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +146,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:40.699713Z' + timestamp: '2024-06-08T22:28:09Z' - torchscript_onnx_tflite: - inference_time: 14106.0 - throughput: 70.8918190840777 + inference_time: 14165.0 + throughput: 70.5965407695023 estimated_peak_memory_range: - min: 5558272 - max: 7622896 + min: 5574656 + max: 14323152 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +160,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 87 - job_id: jo5mz9dwp + job_id: jygzvrxxp job_status: Passed torchscript_onnx_qnn: - inference_time: 15191.0 - throughput: 65.82845105654664 + inference_time: 15225.0 + throughput: 65.68144499178982 estimated_peak_memory_range: - min: 36864 - max: 15082688 + min: 811008 + max: 30220216 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +175,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: j1p87j4x5 + job_id: jqp4jvx1p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +184,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:40.699752Z' + timestamp: '2024-06-08T22:28:07Z' - torchscript_onnx_tflite: - inference_time: 89233.0 - throughput: 11.20661638631448 + inference_time: 89203.0 + throughput: 11.210385300942793 estimated_peak_memory_range: - min: 5718016 - max: 90674400 + min: 6000640 + max: 92646944 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,22 +198,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 87 - job_id: jegne17rg - job_status: Passed - torchscript_onnx_qnn: - inference_time: 90925.0 - throughput: 10.998075336816058 - estimated_peak_memory_range: - min: 929792 - max: 84664912 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 79 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 79 - job_id: jogky692p + job_id: jz5wmqdmg job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -222,13 +207,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-05-29T18:59:40.699790Z' + timestamp: '2024-06-08T22:28:02Z' - torchscript_onnx_tflite: - inference_time: 752252.0 - throughput: 1.3293417631325672 + inference_time: 728106.0 + throughput: 1.373426396705974 estimated_peak_memory_range: - min: 65630208 - max: 185474512 + min: 33034240 + max: 70768096 primary_compute_unit: NPU precision: int8 layer_info: @@ -236,7 +221,7 @@ models: layers_on_gpu: 9 layers_on_cpu: 12 total_layers: 87 - job_id: jopryxn9g + job_id: jmg99w38g job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -245,13 +230,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-05-29T18:59:40.699812Z' + timestamp: '2024-06-08T22:28:03Z' - torchscript_onnx_qnn: - inference_time: 16847.0 - throughput: 59.357749154152074 + inference_time: 16789.0 + throughput: 59.562808982071594 estimated_peak_memory_range: - min: 786432 - max: 786432 + min: 794624 + max: 794624 primary_compute_unit: NPU precision: int8 layer_info: @@ -259,22 +244,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: j2p0rov6p + job_id: jz57vxj95 job_status: Passed torchscript_onnx_ort: - inference_time: 17508.0 - throughput: 57.11674663011195 + inference_time: 12535.0 + throughput: 79.77662544874352 estimated_peak_memory_range: - min: 69443584 - max: 69443584 + min: 835584 + max: 835584 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 82 + layers_on_npu: 80 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 82 - job_id: jw561od0p + total_layers: 80 + job_id: jopr1e47g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -283,4 +268,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.699852Z' + timestamp: '2024-06-08T22:28:10Z' diff --git a/qai_hub_models/models/ffnet_122ns_lowres/README.md b/qai_hub_models/models/ffnet_122ns_lowres/README.md index 6d34b21a..f6d57fa7 100644 --- a/qai_hub_models/models/ffnet_122ns_lowres/README.md +++ b/qai_hub_models/models/ffnet_122ns_lowres/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of FFNet-122NS-LowRes can be found [here](https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Simple and Efficient Architectures for Semantic Segmentation](https://arxiv.org/abs/2206.08236) diff --git a/qai_hub_models/models/ffnet_122ns_lowres/export.py b/qai_hub_models/models/ffnet_122ns_lowres/export.py index 53651a1f..98404700 100644 --- a/qai_hub_models/models/ffnet_122ns_lowres/export.py +++ b/qai_hub_models/models/ffnet_122ns_lowres/export.py @@ -186,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/ffnet_122ns_lowres/info.yaml b/qai_hub_models/models/ffnet_122ns_lowres/info.yaml index a40f6ccd..8ae3d60b 100644 --- a/qai_hub_models/models/ffnet_122ns_lowres/info.yaml +++ b/qai_hub_models/models/ffnet_122ns_lowres/info.yaml @@ -36,3 +36,4 @@ license_type: bsd-3-clause deploy_license_type: AI Model Hub License dataset: - cityscapes +labels_file: cityscapes_labels.txt diff --git a/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml b/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml index 4c3cbe64..996c082b 100644 --- a/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml +++ b/qai_hub_models/models/ffnet_122ns_lowres/perf.yaml @@ -36,11 +36,11 @@ models: - name: FFNet-122NS-LowRes performance_metrics: - torchscript_onnx_tflite: - inference_time: 10505.0 - throughput: 95.19276534983341 + inference_time: 9538.0 + throughput: 104.84378276368211 estimated_peak_memory_range: - min: 647168 - max: 3107656 + min: 0 + max: 1882960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: j1pvw29jg + job_id: j1p8wz3op job_status: Passed torchscript_onnx_qnn: - inference_time: 10881.0 - throughput: 91.90331770976933 + inference_time: 10684.0 + throughput: 93.59790340696368 estimated_peak_memory_range: - min: 6311936 - max: 30292720 + min: 7036928 + max: 23266984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: jygz7z4kp + job_id: j1gle3emp job_status: Passed torchscript_onnx_ort: - inference_time: 7968.0 - throughput: 125.50200803212851 + inference_time: 7933.0 + throughput: 126.05571662674902 estimated_peak_memory_range: - min: 1523712 - max: 135656304 + min: 1155072 + max: 141586240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 350 - job_id: jvgdv62eg + job_id: j1pvzvzrg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:40.743297Z' + timestamp: '2024-06-08T22:28:42Z' - torchscript_onnx_tflite: - inference_time: 7344.0 - throughput: 136.16557734204792 + inference_time: 6833.0 + throughput: 146.34860237084735 estimated_peak_memory_range: - min: 667648 - max: 60722624 + min: 659456 + max: 61929920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: j7gjl3wxp + job_id: jogkr3ln5 job_status: Passed torchscript_onnx_qnn: - inference_time: 7564.0 - throughput: 132.20518244315178 + inference_time: 7606.0 + throughput: 131.47515119642387 estimated_peak_memory_range: min: 6307840 - max: 88725056 + max: 93102864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: jz5w9y46p + job_id: jw56qnqyg job_status: Passed torchscript_onnx_ort: - inference_time: 5884.0 - throughput: 169.9524133242692 + inference_time: 5594.0 + throughput: 178.7629603146228 estimated_peak_memory_range: - min: 8749056 - max: 63861488 + min: 6307840 + max: 59711872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 350 - job_id: jz57do9l5 + job_id: j7gjkeke5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:40.743426Z' + timestamp: '2024-06-08T22:28:43Z' - torchscript_onnx_tflite: - inference_time: 10684.0 - throughput: 93.59790340696368 + inference_time: 9545.0 + throughput: 104.76689366160294 estimated_peak_memory_range: - min: 684032 - max: 10103760 + min: 0 + max: 2096664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 216 - job_id: jlpev6l15 + job_id: jn5q937op job_status: Passed torchscript_onnx_qnn: - inference_time: 10891.0 - throughput: 91.81893306399779 + inference_time: 10716.0 + throughput: 93.3184023889511 estimated_peak_memory_range: - min: 6307840 - max: 38441144 + min: 6311936 + max: 40648480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: jnp18o62g + job_id: jwgoe3ekp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:40.743508Z' + timestamp: '2024-06-08T22:28:41Z' - torchscript_onnx_qnn: - inference_time: 17476.0 - throughput: 57.221332112611584 + inference_time: 17375.0 + throughput: 57.55395683453237 estimated_peak_memory_range: min: 6303744 max: 6303744 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 348 - job_id: jmg94odl5 + job_id: j1p3qeqn5 job_status: Passed torchscript_onnx_ort: - inference_time: 7566.0 - throughput: 132.17023526301878 + inference_time: 7523.0 + throughput: 132.92569453675395 estimated_peak_memory_range: - min: 9342976 - max: 9342976 + min: 6332416 + max: 6332416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 350 - job_id: j0px10x1g + job_id: jlpe4k4v5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.743602Z' + timestamp: '2024-06-08T22:28:44Z' diff --git a/qai_hub_models/models/ffnet_40s/README.md b/qai_hub_models/models/ffnet_40s/README.md index f1911ec4..0bc90d39 100644 --- a/qai_hub_models/models/ffnet_40s/README.md +++ b/qai_hub_models/models/ffnet_40s/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of FFNet-40S can be found [here](https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Simple and Efficient Architectures for Semantic Segmentation](https://arxiv.org/abs/2206.08236) diff --git a/qai_hub_models/models/ffnet_40s/export.py b/qai_hub_models/models/ffnet_40s/export.py index 31513e00..879b2dbd 100644 --- a/qai_hub_models/models/ffnet_40s/export.py +++ b/qai_hub_models/models/ffnet_40s/export.py @@ -186,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/ffnet_40s/info.yaml b/qai_hub_models/models/ffnet_40s/info.yaml index 967b8b32..01f9323a 100644 --- a/qai_hub_models/models/ffnet_40s/info.yaml +++ b/qai_hub_models/models/ffnet_40s/info.yaml @@ -37,3 +37,4 @@ license_type: bsd-3-clause deploy_license_type: AI Model Hub License dataset: - cityscapes +labels_file: cityscapes_labels.txt diff --git a/qai_hub_models/models/ffnet_40s/perf.yaml b/qai_hub_models/models/ffnet_40s/perf.yaml index 5e012497..2da118a9 100644 --- a/qai_hub_models/models/ffnet_40s/perf.yaml +++ b/qai_hub_models/models/ffnet_40s/perf.yaml @@ -36,11 +36,11 @@ models: - name: FFNet-40S performance_metrics: - torchscript_onnx_tflite: - inference_time: 23135.0 - throughput: 43.22455154527772 + inference_time: 23193.0 + throughput: 43.11645755184754 estimated_peak_memory_range: - min: 2457600 - max: 4282696 + min: 2531328 + max: 4441664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: jopryxw9g + job_id: jz5wmqmmg job_status: Passed torchscript_onnx_qnn: - inference_time: 17200.0 - throughput: 58.13953488372093 + inference_time: 17411.0 + throughput: 57.43495491356039 estimated_peak_memory_range: - min: 23400448 - max: 42153440 + min: 25214976 + max: 45407080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: j2p0ro66p + job_id: jvgd7o7zg job_status: Passed torchscript_onnx_ort: - inference_time: 27788.0 - throughput: 35.98675687347056 + inference_time: 27393.0 + throughput: 36.50567663271639 estimated_peak_memory_range: - min: 31006720 - max: 110905224 + min: 34656256 + max: 113886552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: j1glkwl8p + job_id: jo5mv3v95 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:40.778232Z' + timestamp: '2024-06-08T22:29:14Z' - torchscript_onnx_tflite: - inference_time: 16624.0 - throughput: 60.15399422521656 + inference_time: 16820.0 + throughput: 59.45303210463734 estimated_peak_memory_range: - min: 40960 - max: 97875920 + min: 757760 + max: 102036720 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: jep2moe45 + job_id: jmg99w98g job_status: Passed torchscript_onnx_qnn: - inference_time: 12551.0 - throughput: 79.67492630069317 + inference_time: 12560.0 + throughput: 79.61783439490446 estimated_peak_memory_range: - min: 25219072 - max: 85826080 + min: 132333568 + max: 190814608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: j1p87j1x5 + job_id: jz57vxv95 job_status: Passed torchscript_onnx_ort: - inference_time: 19852.0 - throughput: 50.372758412250654 + inference_time: 19832.0 + throughput: 50.42355788624445 estimated_peak_memory_range: - min: 32559104 - max: 73485856 + min: 29405184 + max: 74127520 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: jw561ow0p + job_id: jegnr3rq5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:40.778307Z' + timestamp: '2024-06-08T22:29:15Z' - torchscript_onnx_tflite: - inference_time: 22986.0 - throughput: 43.50474201687984 + inference_time: 23566.0 + throughput: 42.43401510650938 estimated_peak_memory_range: - min: 2535424 - max: 33407872 + min: 2564096 + max: 4836528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 92 - job_id: jqpyd8m7p + job_id: jnp1qeq7g job_status: Passed torchscript_onnx_qnn: - inference_time: 17314.0 - throughput: 57.75672865888876 + inference_time: 17310.0 + throughput: 57.77007510109763 estimated_peak_memory_range: - min: 24948736 - max: 47341064 + min: 25202688 + max: 45281048 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jn5q24v45 + job_id: j0pxeyel5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:40.778354Z' + timestamp: '2024-06-08T22:29:13Z' - torchscript_onnx_qnn: - inference_time: 23238.0 - throughput: 43.03296324984939 + inference_time: 23356.0 + throughput: 42.81555060798082 estimated_peak_memory_range: min: 25219072 max: 25219072 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jogky682p + job_id: jqp4jvj1p job_status: Passed torchscript_onnx_ort: - inference_time: 26282.0 - throughput: 38.048854729472644 + inference_time: 26356.0 + throughput: 37.942024586431934 estimated_peak_memory_range: - min: 25227264 - max: 25227264 + min: 25219072 + max: 25219072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 142 - job_id: j1p3mo6lg + job_id: jopr1e17g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.778407Z' + timestamp: '2024-06-08T22:29:16Z' diff --git a/qai_hub_models/models/ffnet_40s_quantized/README.md b/qai_hub_models/models/ffnet_40s_quantized/README.md index 7767cf30..b730ceb5 100644 --- a/qai_hub_models/models/ffnet_40s_quantized/README.md +++ b/qai_hub_models/models/ffnet_40s_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of FFNet-40S-Quantized can be found [here](https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Simple and Efficient Architectures for Semantic Segmentation](https://arxiv.org/abs/2206.08236) diff --git a/qai_hub_models/models/ffnet_40s_quantized/export.py b/qai_hub_models/models/ffnet_40s_quantized/export.py index 91d168bb..ad846a21 100644 --- a/qai_hub_models/models/ffnet_40s_quantized/export.py +++ b/qai_hub_models/models/ffnet_40s_quantized/export.py @@ -194,7 +194,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -228,7 +228,9 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser( + model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/ffnet_40s_quantized/info.yaml b/qai_hub_models/models/ffnet_40s_quantized/info.yaml index 163abd5d..4c169ebc 100644 --- a/qai_hub_models/models/ffnet_40s_quantized/info.yaml +++ b/qai_hub_models/models/ffnet_40s_quantized/info.yaml @@ -38,3 +38,4 @@ license_type: bsd-3-clause deploy_license_type: AI Model Hub License dataset: - cityscapes +labels_file: cityscapes_labels.txt diff --git a/qai_hub_models/models/ffnet_40s_quantized/perf.yaml b/qai_hub_models/models/ffnet_40s_quantized/perf.yaml index 4413dde3..c8a1dcdd 100644 --- a/qai_hub_models/models/ffnet_40s_quantized/perf.yaml +++ b/qai_hub_models/models/ffnet_40s_quantized/perf.yaml @@ -42,11 +42,11 @@ models: - name: FFNet-40S-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 6426.0 - throughput: 155.6178026766262 + inference_time: 6442.0 + throughput: 155.2312946289972 estimated_peak_memory_range: - min: 2113536 - max: 4414960 + min: 36864 + max: 1593576 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,22 +54,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 97 - job_id: j1pvw27jg + job_id: jqpyv6vlp job_status: Passed torchscript_onnx_ort: - inference_time: 11412.0 - throughput: 87.62705923589205 + inference_time: 9268.0 + throughput: 107.89814415192059 estimated_peak_memory_range: - min: 27074560 - max: 55662472 + min: 7577600 + max: 25025832 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 94 + layers_on_npu: 92 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 94 - job_id: jnp18oj8g + total_layers: 92 + job_id: j1pvzv4rg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -78,13 +78,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:40.815172Z' + timestamp: '2024-06-08T22:30:02Z' - torchscript_onnx_tflite: - inference_time: 4740.0 - throughput: 210.9704641350211 + inference_time: 4682.0 + throughput: 213.58393848782572 estimated_peak_memory_range: - min: 16384 - max: 66762720 + min: 12288 + max: 67067712 primary_compute_unit: NPU precision: int8 layer_info: @@ -92,22 +92,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 97 - job_id: j7gjl3qxp + job_id: j2p0elen5 job_status: Passed torchscript_onnx_ort: - inference_time: 8772.0 - throughput: 113.99908800729594 + inference_time: 7185.0 + throughput: 139.17884481558804 estimated_peak_memory_range: - min: 32075776 - max: 65683792 + min: 6955008 + max: 47776688 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 94 + layers_on_npu: 92 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 94 - job_id: jvgdv63rg + total_layers: 92 + job_id: j7gjke1e5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -116,13 +116,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:40.815222Z' + timestamp: '2024-06-08T22:30:03Z' - torchscript_onnx_tflite: - inference_time: 6448.0 - throughput: 155.08684863523573 + inference_time: 6401.0 + throughput: 156.22558975160132 estimated_peak_memory_range: min: 651264 - max: 2142088 + max: 2179136 primary_compute_unit: NPU precision: int8 layer_info: @@ -130,7 +130,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 97 - job_id: jlpev6y15 + job_id: j1p8wzwop job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -139,13 +139,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:40.815247Z' + timestamp: '2024-06-08T22:29:54Z' - torchscript_onnx_tflite: - inference_time: 34910.0 - throughput: 28.64508736751647 + inference_time: 35462.0 + throughput: 28.199199142744344 estimated_peak_memory_range: - min: 131072 - max: 37241984 + min: 163840 + max: 38805968 primary_compute_unit: NPU precision: int8 layer_info: @@ -153,7 +153,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 97 - job_id: jygz7znkp + job_id: jogkr3rn5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -162,13 +162,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-05-29T18:59:40.815272Z' + timestamp: '2024-06-08T22:29:55Z' - torchscript_onnx_tflite: - inference_time: 189525.0 - throughput: 5.276348766653475 + inference_time: 189203.0 + throughput: 5.285328456736944 estimated_peak_memory_range: - min: 827392 - max: 2998544 + min: 835584 + max: 9440536 primary_compute_unit: NPU precision: int8 layer_info: @@ -176,7 +176,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 97 - job_id: jz5w9y76p + job_id: jn5q939op job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -185,21 +185,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-05-29T18:59:40.815296Z' + timestamp: '2024-06-08T22:29:56Z' - torchscript_onnx_ort: - inference_time: 10833.0 - throughput: 92.31053263177328 + inference_time: 8436.0 + throughput: 118.53959222380276 estimated_peak_memory_range: - min: 25223168 - max: 25223168 + min: 23719936 + max: 23719936 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 94 + layers_on_npu: 92 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 94 - job_id: jz57do4v5 + total_layers: 92 + job_id: jlpe4k2v5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -208,4 +208,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.815322Z' + timestamp: '2024-06-08T22:30:04Z' diff --git a/qai_hub_models/models/ffnet_54s/README.md b/qai_hub_models/models/ffnet_54s/README.md index 6aea8fe7..4122507a 100644 --- a/qai_hub_models/models/ffnet_54s/README.md +++ b/qai_hub_models/models/ffnet_54s/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of FFNet-54S can be found [here](https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Simple and Efficient Architectures for Semantic Segmentation](https://arxiv.org/abs/2206.08236) diff --git a/qai_hub_models/models/ffnet_54s/export.py b/qai_hub_models/models/ffnet_54s/export.py index 78f844ff..a573a9f5 100644 --- a/qai_hub_models/models/ffnet_54s/export.py +++ b/qai_hub_models/models/ffnet_54s/export.py @@ -186,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/ffnet_54s/info.yaml b/qai_hub_models/models/ffnet_54s/info.yaml index 846f0dd0..c40cad2c 100644 --- a/qai_hub_models/models/ffnet_54s/info.yaml +++ b/qai_hub_models/models/ffnet_54s/info.yaml @@ -36,3 +36,4 @@ license_type: bsd-3-clause deploy_license_type: AI Model Hub License dataset: - cityscapes +labels_file: cityscapes_labels.txt diff --git a/qai_hub_models/models/ffnet_54s/perf.yaml b/qai_hub_models/models/ffnet_54s/perf.yaml index 2c8c14fb..e912f5b5 100644 --- a/qai_hub_models/models/ffnet_54s/perf.yaml +++ b/qai_hub_models/models/ffnet_54s/perf.yaml @@ -36,11 +36,11 @@ models: - name: FFNet-54S performance_metrics: - torchscript_onnx_tflite: - inference_time: 25448.0 - throughput: 39.295818924866396 + inference_time: 25403.0 + throughput: 39.365429280006296 estimated_peak_memory_range: - min: 2547712 - max: 5357880 + min: 4255744 + max: 6909008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: j0px1043g + job_id: jz5wmqxmg job_status: Passed torchscript_onnx_qnn: - inference_time: 19884.0 - throughput: 50.291691812512575 + inference_time: 20253.0 + throughput: 49.37540117513455 estimated_peak_memory_range: - min: 25227264 - max: 49363400 + min: 25219072 + max: 49749016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: jopryx00g + job_id: jvgd7o0zg job_status: Passed torchscript_onnx_ort: - inference_time: 30303.0 - throughput: 33.000033000033 + inference_time: 30396.0 + throughput: 32.89906566653507 estimated_peak_memory_range: - min: 30199808 - max: 136386576 + min: 25182208 + max: 90860800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 177 - job_id: j1p87jxk5 + job_id: jvgd7o06g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:40.847740Z' + timestamp: '2024-06-08T22:30:34Z' - torchscript_onnx_tflite: - inference_time: 18458.0 - throughput: 54.17705060136526 + inference_time: 18529.0 + throughput: 53.96945328943818 estimated_peak_memory_range: - min: 2494464 - max: 110603872 + min: 2461696 + max: 110619440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: jo5mz9mdp + job_id: jmg99w88g job_status: Passed torchscript_onnx_qnn: - inference_time: 14595.0 - throughput: 68.51661527920521 + inference_time: 14443.0 + throughput: 69.23769300006924 estimated_peak_memory_range: - min: 21004288 - max: 88103104 + min: 20983808 + max: 91014848 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: jep2mowr5 + job_id: jz5wmqx4g job_status: Passed torchscript_onnx_ort: - inference_time: 22562.0 - throughput: 44.32231185178619 + inference_time: 23366.0 + throughput: 42.79722673970727 estimated_peak_memory_range: - min: 29396992 - max: 70535856 + min: 29618176 + max: 74645360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 177 - job_id: jogky64wp + job_id: jz57vx6n5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:40.847824Z' + timestamp: '2024-06-08T22:30:35Z' - torchscript_onnx_tflite: - inference_time: 25423.0 - throughput: 39.33446092121307 + inference_time: 25775.0 + throughput: 38.797284190106694 estimated_peak_memory_range: - min: 2543616 - max: 4749104 + min: 2547712 + max: 5263000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 113 - job_id: jegne1nkg + job_id: jnp1qe37g job_status: Passed torchscript_onnx_qnn: - inference_time: 20236.0 - throughput: 49.4168808064835 + inference_time: 20126.0 + throughput: 49.686972075921695 estimated_peak_memory_range: - min: 25235456 - max: 45160432 + min: 25214976 + max: 40883168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: j2p0roj9p + job_id: jnp1qe3ng job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:40.847877Z' + timestamp: '2024-06-08T22:30:33Z' - torchscript_onnx_qnn: - inference_time: 25826.0 - throughput: 38.72066909316193 + inference_time: 25735.0 + throughput: 38.857586943850784 estimated_peak_memory_range: min: 25219072 max: 25219072 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 175 - job_id: jqpyd8x8p + job_id: jmg99w8mg job_status: Passed torchscript_onnx_ort: - inference_time: 29590.0 - throughput: 33.795201081446436 + inference_time: 29431.0 + throughput: 33.97777853283952 estimated_peak_memory_range: - min: 25219072 - max: 25219072 + min: 25223168 + max: 25223168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 177 - job_id: jn5q24yn5 + job_id: jqp4jv82p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.847941Z' + timestamp: '2024-06-08T22:30:36Z' diff --git a/qai_hub_models/models/ffnet_54s_quantized/README.md b/qai_hub_models/models/ffnet_54s_quantized/README.md index 9f4d0a9c..5ab17ab3 100644 --- a/qai_hub_models/models/ffnet_54s_quantized/README.md +++ b/qai_hub_models/models/ffnet_54s_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of FFNet-54S-Quantized can be found [here](https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Simple and Efficient Architectures for Semantic Segmentation](https://arxiv.org/abs/2206.08236) diff --git a/qai_hub_models/models/ffnet_54s_quantized/export.py b/qai_hub_models/models/ffnet_54s_quantized/export.py index ec7c47fc..f16f2f1a 100644 --- a/qai_hub_models/models/ffnet_54s_quantized/export.py +++ b/qai_hub_models/models/ffnet_54s_quantized/export.py @@ -194,7 +194,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -228,7 +228,9 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser( + model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/ffnet_54s_quantized/info.yaml b/qai_hub_models/models/ffnet_54s_quantized/info.yaml index a7f45fd7..60940868 100644 --- a/qai_hub_models/models/ffnet_54s_quantized/info.yaml +++ b/qai_hub_models/models/ffnet_54s_quantized/info.yaml @@ -38,3 +38,4 @@ license_type: bsd-3-clause deploy_license_type: AI Model Hub License dataset: - cityscapes +labels_file: cityscapes_labels.txt diff --git a/qai_hub_models/models/ffnet_54s_quantized/perf.yaml b/qai_hub_models/models/ffnet_54s_quantized/perf.yaml index 1b654591..31883584 100644 --- a/qai_hub_models/models/ffnet_54s_quantized/perf.yaml +++ b/qai_hub_models/models/ffnet_54s_quantized/perf.yaml @@ -42,11 +42,11 @@ models: - name: FFNet-54S-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 7120.0 - throughput: 140.4494382022472 + inference_time: 7119.0 + throughput: 140.4691670178396 estimated_peak_memory_range: - min: 2142208 - max: 10612096 + min: 688128 + max: 2335176 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,22 +54,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jw561o76p + job_id: jo5mv3475 job_status: Passed torchscript_onnx_ort: - inference_time: 11873.0 - throughput: 84.22471153036301 + inference_time: 9678.0 + throughput: 103.32713370531101 estimated_peak_memory_range: - min: 30167040 - max: 64592624 + min: 7581696 + max: 40900680 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 115 + layers_on_npu: 113 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 115 - job_id: jvgdv6drg + total_layers: 113 + job_id: j1gle3o2p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -78,13 +78,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:40.882272Z' + timestamp: '2024-06-08T22:31:28Z' - torchscript_onnx_tflite: - inference_time: 5175.0 - throughput: 193.23671497584542 + inference_time: 5120.0 + throughput: 195.3125 estimated_peak_memory_range: - min: 241664 - max: 74191168 + min: 45056 + max: 74881936 primary_compute_unit: NPU precision: int8 layer_info: @@ -92,22 +92,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: j1p3mo93g + job_id: jegnr3xj5 job_status: Passed torchscript_onnx_ort: - inference_time: 8976.0 - throughput: 111.40819964349376 + inference_time: 7395.0 + throughput: 135.2265043948614 estimated_peak_memory_range: - min: 18096128 - max: 52516464 + min: 5738496 + max: 42316048 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 115 + layers_on_npu: 113 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 115 - job_id: jz57doev5 + total_layers: 113 + job_id: jw56qnrng job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -116,13 +116,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:40.882322Z' + timestamp: '2024-06-08T22:31:29Z' - torchscript_onnx_tflite: inference_time: 7096.0 throughput: 140.92446448703495 estimated_peak_memory_range: - min: 655360 - max: 2310944 + min: 61440 + max: 14772576 primary_compute_unit: NPU precision: int8 layer_info: @@ -130,7 +130,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jwgovdrq5 + job_id: jopr1e9kg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -139,13 +139,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:40.882348Z' + timestamp: '2024-06-08T22:31:20Z' - torchscript_onnx_tflite: - inference_time: 39841.0 - throughput: 25.09977159207851 + inference_time: 39816.0 + throughput: 25.11553144464537 estimated_peak_memory_range: - min: 12288 - max: 39329584 + min: 122880 + max: 41244048 primary_compute_unit: NPU precision: int8 layer_info: @@ -153,7 +153,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: j1pvw2lkg + job_id: jep23lj6g job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -162,13 +162,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-05-29T18:59:40.882373Z' + timestamp: '2024-06-08T22:31:21Z' - torchscript_onnx_tflite: - inference_time: 204729.0 - throughput: 4.884505858964777 + inference_time: 203928.0 + throughput: 4.903691498960417 estimated_peak_memory_range: - min: 237568 - max: 7136480 + min: 225280 + max: 7415104 primary_compute_unit: NPU precision: int8 layer_info: @@ -176,7 +176,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: j7gjl3rvp + job_id: jqpyv6n0p job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -185,21 +185,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-05-29T18:59:40.882397Z' + timestamp: '2024-06-08T22:31:22Z' - torchscript_onnx_ort: - inference_time: 11333.0 - throughput: 88.23788934968675 + inference_time: 8994.0 + throughput: 111.185234600845 estimated_peak_memory_range: - min: 25227264 - max: 25227264 + min: 6340608 + max: 6340608 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 115 + layers_on_npu: 113 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 115 - job_id: jqp4wey8g + total_layers: 113 + job_id: j1p3qexm5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -208,4 +208,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.882423Z' + timestamp: '2024-06-08T22:31:30Z' diff --git a/qai_hub_models/models/ffnet_78s/README.md b/qai_hub_models/models/ffnet_78s/README.md index c3f6b6dc..c3d9f2d0 100644 --- a/qai_hub_models/models/ffnet_78s/README.md +++ b/qai_hub_models/models/ffnet_78s/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of FFNet-78S can be found [here](https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Simple and Efficient Architectures for Semantic Segmentation](https://arxiv.org/abs/2206.08236) diff --git a/qai_hub_models/models/ffnet_78s/export.py b/qai_hub_models/models/ffnet_78s/export.py index 01600621..f0bec0bf 100644 --- a/qai_hub_models/models/ffnet_78s/export.py +++ b/qai_hub_models/models/ffnet_78s/export.py @@ -186,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/ffnet_78s/info.yaml b/qai_hub_models/models/ffnet_78s/info.yaml index 1f1d8cf9..233239ad 100644 --- a/qai_hub_models/models/ffnet_78s/info.yaml +++ b/qai_hub_models/models/ffnet_78s/info.yaml @@ -36,3 +36,4 @@ license_type: bsd-3-clause deploy_license_type: AI Model Hub License dataset: - cityscapes +labels_file: cityscapes_labels.txt diff --git a/qai_hub_models/models/ffnet_78s/perf.yaml b/qai_hub_models/models/ffnet_78s/perf.yaml index 6c7da0ea..1d0087dd 100644 --- a/qai_hub_models/models/ffnet_78s/perf.yaml +++ b/qai_hub_models/models/ffnet_78s/perf.yaml @@ -36,11 +36,11 @@ models: - name: FFNet-78S performance_metrics: - torchscript_onnx_tflite: - inference_time: 29028.0 - throughput: 34.44949703734326 + inference_time: 29896.0 + throughput: 33.44929087503345 estimated_peak_memory_range: - min: 2166784 - max: 4957752 + min: 2584576 + max: 5177832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jo5mz90dp + job_id: j1pvzvezg job_status: Passed torchscript_onnx_qnn: - inference_time: 24289.0 - throughput: 41.170900407591915 + inference_time: 23500.0 + throughput: 42.5531914893617 estimated_peak_memory_range: - min: 25210880 - max: 56144640 + min: 25223168 + max: 55846352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jep2morr5 + job_id: jygzvr84p job_status: Passed torchscript_onnx_ort: - inference_time: 33942.0 - throughput: 29.46202345177067 + inference_time: 34791.0 + throughput: 28.743065735391337 estimated_peak_memory_range: - min: 30212096 - max: 151965672 + min: 31657984 + max: 174636584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 237 - job_id: jogky62wp + job_id: jvgd7o86g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:40.915116Z' + timestamp: '2024-06-08T22:32:03Z' - torchscript_onnx_tflite: - inference_time: 21499.0 - throughput: 46.513791339132055 + inference_time: 21247.0 + throughput: 47.065468066079916 estimated_peak_memory_range: - min: 1843200 - max: 120056032 + min: 684032 + max: 120904016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jegne1zkg + job_id: j7gjkeo15 job_status: Passed torchscript_onnx_qnn: - inference_time: 17622.0 - throughput: 56.74724775848372 + inference_time: 17520.0 + throughput: 57.077625570776256 estimated_peak_memory_range: - min: 20983808 - max: 102064896 + min: 21012480 + max: 102988784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: jqpyd8o8p + job_id: jz5wmq84g job_status: Passed torchscript_onnx_ort: - inference_time: 26773.0 - throughput: 37.35106263773204 + inference_time: 25762.0 + throughput: 38.816862044872295 estimated_peak_memory_range: - min: 29380608 - max: 82049232 + min: 31490048 + max: 82980160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 237 - job_id: jn5q24ln5 + job_id: jz57vxkn5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:40.915207Z' + timestamp: '2024-06-08T22:32:04Z' - torchscript_onnx_tflite: - inference_time: 29503.0 - throughput: 33.89485815001864 + inference_time: 29131.0 + throughput: 34.327692149256805 estimated_peak_memory_range: - min: 2560000 - max: 5083704 + min: 2592768 + max: 5433672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jopryxl0g + job_id: jlpe4k885 job_status: Passed torchscript_onnx_qnn: - inference_time: 23855.0 - throughput: 41.919932928107315 + inference_time: 23774.0 + throughput: 42.06275763439051 estimated_peak_memory_range: - min: 25219072 - max: 47387568 + min: 27922432 + max: 51160616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: j1p87jek5 + job_id: jnp1qe7ng job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:40.915264Z' + timestamp: '2024-06-08T22:32:02Z' - torchscript_onnx_qnn: - inference_time: 32527.0 - throughput: 30.743689857656715 + inference_time: 32569.0 + throughput: 30.70404372255826 estimated_peak_memory_range: min: 25214976 max: 25214976 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 235 - job_id: j2p0rom9p + job_id: jmg99wkmg job_status: Passed torchscript_onnx_ort: - inference_time: 33218.0 - throughput: 30.104160394966584 + inference_time: 33100.0 + throughput: 30.211480362537763 estimated_peak_memory_range: - min: 34959360 - max: 34959360 + min: 25219072 + max: 25219072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 237 - job_id: j1glkwyjp + job_id: jqp4jvm2p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.915328Z' + timestamp: '2024-06-08T22:32:05Z' diff --git a/qai_hub_models/models/ffnet_78s_lowres/README.md b/qai_hub_models/models/ffnet_78s_lowres/README.md index ac546964..306f938a 100644 --- a/qai_hub_models/models/ffnet_78s_lowres/README.md +++ b/qai_hub_models/models/ffnet_78s_lowres/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of FFNet-78S-LowRes can be found [here](https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Simple and Efficient Architectures for Semantic Segmentation](https://arxiv.org/abs/2206.08236) diff --git a/qai_hub_models/models/ffnet_78s_lowres/export.py b/qai_hub_models/models/ffnet_78s_lowres/export.py index b0997e75..1ee8b996 100644 --- a/qai_hub_models/models/ffnet_78s_lowres/export.py +++ b/qai_hub_models/models/ffnet_78s_lowres/export.py @@ -186,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/ffnet_78s_lowres/info.yaml b/qai_hub_models/models/ffnet_78s_lowres/info.yaml index c50bd7cc..ffded2ec 100644 --- a/qai_hub_models/models/ffnet_78s_lowres/info.yaml +++ b/qai_hub_models/models/ffnet_78s_lowres/info.yaml @@ -37,3 +37,4 @@ license_type: bsd-3-clause deploy_license_type: AI Model Hub License dataset: - cityscapes +labels_file: cityscapes_labels.txt diff --git a/qai_hub_models/models/ffnet_78s_lowres/perf.yaml b/qai_hub_models/models/ffnet_78s_lowres/perf.yaml index aaa178ed..9f48808e 100644 --- a/qai_hub_models/models/ffnet_78s_lowres/perf.yaml +++ b/qai_hub_models/models/ffnet_78s_lowres/perf.yaml @@ -36,11 +36,11 @@ models: - name: FFNet-78S-LowRes performance_metrics: - torchscript_onnx_tflite: - inference_time: 10790.0 - throughput: 92.67840593141798 + inference_time: 10698.0 + throughput: 93.47541596560104 estimated_peak_memory_range: - min: 638976 - max: 2877104 + min: 12288 + max: 8183320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: j1p3moz3g + job_id: jo5mv3o75 job_status: Passed torchscript_onnx_qnn: - inference_time: 11359.0 - throughput: 88.03591865481117 + inference_time: 11228.0 + throughput: 89.06305664410402 estimated_peak_memory_range: - min: 2330624 - max: 55394032 + min: 2109440 + max: 55500544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: j7gjl33vp + job_id: jep23l46g job_status: Passed torchscript_onnx_ort: - inference_time: 8843.0 - throughput: 113.0837950921633 + inference_time: 8904.0 + throughput: 112.30907457322552 estimated_peak_memory_range: - min: 2228224 - max: 123699768 + min: 1257472 + max: 128438216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 238 - job_id: jmg94oow5 + job_id: jogkr39v5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:40.949492Z' + timestamp: '2024-06-08T22:32:36Z' - torchscript_onnx_tflite: - inference_time: 7636.0 - throughput: 130.95861707700368 + inference_time: 7663.0 + throughput: 130.49719431032233 estimated_peak_memory_range: - min: 45056 - max: 52202592 + min: 159744 + max: 55453776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 149 - job_id: jwgovdlq5 + job_id: jegnr3oj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 7922.0 - throughput: 126.23074981065388 + inference_time: 7958.0 + throughput: 125.65971349585323 estimated_peak_memory_range: min: 6307840 - max: 73285696 + max: 77174624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: jlpev66o5 + job_id: jqpyv6q0p job_status: Passed torchscript_onnx_ort: - inference_time: 6682.0 - throughput: 149.655791679138 + inference_time: 6766.0 + throughput: 147.79781259237365 estimated_peak_memory_range: - min: 6246400 - max: 48859696 + min: 6307840 + max: 49412144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 238 - job_id: jnp18oo8g + job_id: jn5q93mep job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:40.949582Z' - - torchscript_onnx_qnn: - inference_time: 11366.0 - throughput: 87.98169980644026 + timestamp: '2024-06-08T22:32:37Z' + - torchscript_onnx_tflite: + inference_time: 10676.0 + throughput: 93.66804046459347 + estimated_peak_memory_range: + min: 569344 + max: 2852616 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 149 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 149 + job_id: jopr1eokg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 11306.0 + throughput: 88.44861135680169 estimated_peak_memory_range: - min: 32768 - max: 52846864 + min: 16384 + max: 52829760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: jz5w9yy3p + job_id: j1p8wz4qp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -163,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:40.949622Z' + timestamp: '2024-06-08T22:32:35Z' - torchscript_onnx_qnn: - inference_time: 20343.0 - throughput: 49.1569581674286 + inference_time: 20526.0 + throughput: 48.718698236383126 estimated_peak_memory_range: min: 6303744 max: 6303744 @@ -177,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 236 - job_id: jygz7zzop + job_id: j2p0elv05 job_status: Passed torchscript_onnx_ort: - inference_time: 8732.0 - throughput: 114.52130096197892 + inference_time: 8769.0 + throughput: 114.03808872163303 estimated_peak_memory_range: - min: 40693760 - max: 40693760 + min: 30912512 + max: 30912512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 238 - job_id: jvgdv66rg + job_id: j1gle312p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -201,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.949692Z' + timestamp: '2024-06-08T22:32:38Z' diff --git a/qai_hub_models/models/ffnet_78s_quantized/README.md b/qai_hub_models/models/ffnet_78s_quantized/README.md index 43dcb2af..eaaccda1 100644 --- a/qai_hub_models/models/ffnet_78s_quantized/README.md +++ b/qai_hub_models/models/ffnet_78s_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of FFNet-78S-Quantized can be found [here](https://github.com/Qualcomm-AI-research/FFNet/blob/master/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Simple and Efficient Architectures for Semantic Segmentation](https://arxiv.org/abs/2206.08236) diff --git a/qai_hub_models/models/ffnet_78s_quantized/export.py b/qai_hub_models/models/ffnet_78s_quantized/export.py index 9b8ba13a..c09312bb 100644 --- a/qai_hub_models/models/ffnet_78s_quantized/export.py +++ b/qai_hub_models/models/ffnet_78s_quantized/export.py @@ -194,7 +194,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -228,7 +228,9 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser( + model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/ffnet_78s_quantized/info.yaml b/qai_hub_models/models/ffnet_78s_quantized/info.yaml index 86f97e34..7e6703e9 100644 --- a/qai_hub_models/models/ffnet_78s_quantized/info.yaml +++ b/qai_hub_models/models/ffnet_78s_quantized/info.yaml @@ -38,3 +38,4 @@ license_type: bsd-3-clause deploy_license_type: AI Model Hub License dataset: - cityscapes +labels_file: cityscapes_labels.txt diff --git a/qai_hub_models/models/ffnet_78s_quantized/perf.yaml b/qai_hub_models/models/ffnet_78s_quantized/perf.yaml index b26e69f8..9f134ee8 100644 --- a/qai_hub_models/models/ffnet_78s_quantized/perf.yaml +++ b/qai_hub_models/models/ffnet_78s_quantized/perf.yaml @@ -42,11 +42,11 @@ models: - name: FFNet-78S-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 8341.0 - throughput: 119.88970147464333 + inference_time: 8325.0 + throughput: 120.12012012012012 estimated_peak_memory_range: - min: 688128 - max: 2360856 + min: 663552 + max: 8732048 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,22 +54,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 154 - job_id: jqp4wee8g + job_id: j1p3qewm5 job_status: Passed torchscript_onnx_ort: - inference_time: 12055.0 - throughput: 82.9531314807134 + inference_time: 9764.0 + throughput: 102.41704219582138 estimated_peak_memory_range: - min: 30191616 - max: 78085232 + min: 7573504 + max: 52534152 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 151 + layers_on_npu: 149 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 151 - job_id: jn5q244n5 + total_layers: 149 + job_id: jvgd7oz6g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -78,13 +78,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:40.983404Z' + timestamp: '2024-06-08T22:33:40Z' - torchscript_onnx_tflite: - inference_time: 6017.0 - throughput: 166.19577862722286 + inference_time: 6002.0 + throughput: 166.61112962345885 estimated_peak_memory_range: - min: 16384 - max: 86811680 + min: 57344 + max: 86915504 primary_compute_unit: NPU precision: int8 layer_info: @@ -92,22 +92,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 154 - job_id: j0px1003g + job_id: jwgoe341p job_status: Passed torchscript_onnx_ort: - inference_time: 9813.0 - throughput: 101.9056353816366 + inference_time: 7233.0 + throughput: 138.25521913452232 estimated_peak_memory_range: - min: 28508160 - max: 75455072 + min: 8347648 + max: 53601040 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 151 + layers_on_npu: 149 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 151 - job_id: j1glkwwjp + total_layers: 149 + job_id: jz57vx7n5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -116,13 +116,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:40.983462Z' + timestamp: '2024-06-08T22:33:41Z' - torchscript_onnx_tflite: - inference_time: 8357.0 - throughput: 119.66016513102788 + inference_time: 8359.0 + throughput: 119.63153487259241 estimated_peak_memory_range: - min: 753664 - max: 2531424 + min: 679936 + max: 2337912 primary_compute_unit: NPU precision: int8 layer_info: @@ -130,7 +130,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 154 - job_id: jo5mz99dp + job_id: j1pvzv9zg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -139,13 +139,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:40.983492Z' + timestamp: '2024-06-08T22:33:33Z' - torchscript_onnx_tflite: - inference_time: 44085.0 - throughput: 22.683452421458547 + inference_time: 44458.0 + throughput: 22.49313959242431 estimated_peak_memory_range: - min: 786432 - max: 43758368 + min: 729088 + max: 44729792 primary_compute_unit: NPU precision: int8 layer_info: @@ -153,7 +153,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 154 - job_id: jegne11kg + job_id: j7gjkew15 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -162,13 +162,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-05-29T18:59:40.983523Z' + timestamp: '2024-06-08T22:33:33Z' - torchscript_onnx_tflite: - inference_time: 216166.0 - throughput: 4.626074405780742 + inference_time: 219858.0 + throughput: 4.548390324664101 estimated_peak_memory_range: - min: 880640 - max: 7838848 + min: 393216 + max: 2901200 primary_compute_unit: NPU precision: int8 layer_info: @@ -176,7 +176,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 154 - job_id: jopryxx0g + job_id: jlpe4kl85 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -185,21 +185,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-05-29T18:59:40.983553Z' + timestamp: '2024-06-08T22:33:34Z' - torchscript_onnx_ort: - inference_time: 11523.0 - throughput: 86.78295582747549 + inference_time: 9426.0 + throughput: 106.08953957139826 estimated_peak_memory_range: - min: 34738176 - max: 34738176 + min: 5931008 + max: 5931008 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 151 + layers_on_npu: 149 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 151 - job_id: jw561oo6p + total_layers: 149 + job_id: jqp4jv92p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -208,4 +208,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:40.983583Z' + timestamp: '2024-06-08T22:33:42Z' diff --git a/qai_hub_models/models/googlenet/README.md b/qai_hub_models/models/googlenet/README.md index 71a8d343..214ae1f8 100644 --- a/qai_hub_models/models/googlenet/README.md +++ b/qai_hub_models/models/googlenet/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of GoogLeNet can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Going Deeper with Convolutions](https://arxiv.org/abs/1409.4842) diff --git a/qai_hub_models/models/googlenet/evaluate.py b/qai_hub_models/models/googlenet/evaluate.py new file mode 100644 index 00000000..70a3da92 --- /dev/null +++ b/qai_hub_models/models/googlenet/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.googlenet import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/googlenet/export.py b/qai_hub_models/models/googlenet/export.py index a369bd62..e611b7d9 100644 --- a/qai_hub_models/models/googlenet/export.py +++ b/qai_hub_models/models/googlenet/export.py @@ -188,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/googlenet/info.yaml b/qai_hub_models/models/googlenet/info.yaml index e3143397..c7e0ca3d 100644 --- a/qai_hub_models/models/googlenet/info.yaml +++ b/qai_hub_models/models/googlenet/info.yaml @@ -39,3 +39,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/googlenet/perf.yaml b/qai_hub_models/models/googlenet/perf.yaml index 20bc3165..46726a11 100644 --- a/qai_hub_models/models/googlenet/perf.yaml +++ b/qai_hub_models/models/googlenet/perf.yaml @@ -36,11 +36,11 @@ models: - name: GoogLeNet performance_metrics: - torchscript_onnx_tflite: - inference_time: 1051.0 - throughput: 951.4747859181732 + inference_time: 1052.0 + throughput: 950.5703422053232 estimated_peak_memory_range: - min: 36864 - max: 17333992 + min: 73728 + max: 1671408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jopryxd0g + job_id: j7gjkeq15 job_status: Passed torchscript_onnx_qnn: - inference_time: 1087.0 - throughput: 919.9632014719411 + inference_time: 1088.0 + throughput: 919.1176470588235 estimated_peak_memory_range: - min: 0 - max: 4362392 + min: 16384 + max: 26332424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: j2p0ro99p + job_id: jz5wmq44g job_status: Passed torchscript_onnx_ort: - inference_time: 1256.0 - throughput: 796.1783439490446 + inference_time: 1306.0 + throughput: 765.6967840735069 estimated_peak_memory_range: - min: 16384 - max: 56215984 + min: 81920 + max: 33177416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: j1glkw8jp + job_id: jz57vx9n5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.061712Z' + timestamp: '2024-06-08T22:34:47Z' - torchscript_onnx_tflite: - inference_time: 674.0 - throughput: 1483.679525222552 + inference_time: 686.0 + throughput: 1457.725947521866 estimated_peak_memory_range: - min: 12288 - max: 46434032 + min: 16384 + max: 47804608 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jep2modr5 + job_id: jlpe4ky85 job_status: Passed torchscript_onnx_qnn: - inference_time: 695.0 - throughput: 1438.8489208633093 + inference_time: 700.0 + throughput: 1428.5714285714287 estimated_peak_memory_range: - min: 618496 - max: 58056704 + min: 0 + max: 53870528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: j1p87jrk5 + job_id: jmg99wdmg job_status: Passed torchscript_onnx_ort: - inference_time: 834.0 - throughput: 1199.0407673860911 + inference_time: 828.0 + throughput: 1207.729468599034 estimated_peak_memory_range: - min: 0 - max: 29165392 + min: 618496 + max: 31247424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: jw561om6p + job_id: jqp4jv32p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.061783Z' + timestamp: '2024-06-08T22:34:48Z' - torchscript_onnx_tflite: - inference_time: 1047.0 - throughput: 955.1098376313277 + inference_time: 1048.0 + throughput: 954.1984732824427 estimated_peak_memory_range: - min: 20480 - max: 2243656 + min: 40960 + max: 17749600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jqpyd828p + job_id: jygzvrn4p job_status: Passed torchscript_onnx_qnn: - inference_time: 1099.0 - throughput: 909.9181073703367 + inference_time: 1098.0 + throughput: 910.7468123861566 estimated_peak_memory_range: - min: 12288 - max: 26385480 + min: 491520 + max: 26782184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jn5q241n5 + job_id: jvgd7o26g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.061828Z' + timestamp: '2024-06-08T22:34:46Z' - torchscript_onnx_qnn: - inference_time: 1276.0 - throughput: 783.6990595611285 + inference_time: 1266.0 + throughput: 789.8894154818325 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 143 - job_id: jogky60wp + job_id: jnp1qe6ng job_status: Passed torchscript_onnx_ort: - inference_time: 1316.0 - throughput: 759.8784194528876 + inference_time: 1388.0 + throughput: 720.4610951008646 estimated_peak_memory_range: - min: 15437824 - max: 15437824 + min: 671744 + max: 671744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 145 - job_id: j1p3mo73g + job_id: j0pxeyx85 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.061878Z' + timestamp: '2024-06-08T22:34:49Z' diff --git a/qai_hub_models/models/googlenet_quantized/README.md b/qai_hub_models/models/googlenet_quantized/README.md index cd504476..91e33b0b 100644 --- a/qai_hub_models/models/googlenet_quantized/README.md +++ b/qai_hub_models/models/googlenet_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of GoogLeNetQuantized can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Going Deeper with Convolutions](https://arxiv.org/abs/1409.4842) diff --git a/qai_hub_models/models/googlenet_quantized/evaluate.py b/qai_hub_models/models/googlenet_quantized/evaluate.py new file mode 100644 index 00000000..0e8be6d5 --- /dev/null +++ b/qai_hub_models/models/googlenet_quantized/evaluate.py @@ -0,0 +1,62 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.googlenet_quantized import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.base_model import BaseModel +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args +from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + + # Use Fp16 model for torch inference + for cls in Model.__mro__: + if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin): + torch_cls = cls + break + torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/googlenet_quantized/export.py b/qai_hub_models/models/googlenet_quantized/export.py index 616b2243..c9504b86 100644 --- a/qai_hub_models/models/googlenet_quantized/export.py +++ b/qai_hub_models/models/googlenet_quantized/export.py @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/googlenet_quantized/info.yaml b/qai_hub_models/models/googlenet_quantized/info.yaml index 0d5b5538..866df2a0 100644 --- a/qai_hub_models/models/googlenet_quantized/info.yaml +++ b/qai_hub_models/models/googlenet_quantized/info.yaml @@ -40,3 +40,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/googlenet_quantized/model.py b/qai_hub_models/models/googlenet_quantized/model.py index 8c9b76d6..e41ef8c4 100644 --- a/qai_hub_models/models/googlenet_quantized/model.py +++ b/qai_hub_models/models/googlenet_quantized/model.py @@ -22,7 +22,7 @@ from qai_hub_models.utils.asset_loaders import CachedWebModelAsset from qai_hub_models.utils.quantization_aimet import ( constrain_quantized_inputs_to_image_range, - tie_aimet_observer_groups, + tie_observers, ) MODEL_ID = __name__.split(".")[-2] @@ -72,7 +72,7 @@ def from_pretrained( config_file=get_default_aimet_config(), dummy_input=torch.rand(input_shape), ) - cls._tie_pre_concat_quantizers(sim) + tie_observers(sim) constrain_quantized_inputs_to_image_range(sim) if aimet_encodings: @@ -84,36 +84,3 @@ def from_pretrained( sim.model.eval() return cls(sim) - - @classmethod - def _tie_pre_concat_quantizers(cls, sim: QuantizationSimModel): - """ - This ties together the output quantizers prior to concatenations. This - prevents unnecessary re-quantization during the concatenation. - """ - blocks = [ - sim.model.net.inception3a, - sim.model.net.inception3b, - sim.model.net.inception4a, - sim.model.net.inception4b, - sim.model.net.inception4c, - sim.model.net.inception4d, - sim.model.net.inception4e, - sim.model.net.inception5a, - sim.model.net.inception5b, - ] - - idx = 3 - groups = [] - for block in blocks: - groups.append( - [ - getattr(block.branch1, f"module_relu_{idx}"), - getattr(getattr(block.branch2, "1"), f"module_relu_{idx+2}"), - getattr(getattr(block.branch3, "1"), f"module_relu_{idx+4}"), - getattr(getattr(block.branch4, "1"), f"module_relu_{idx+5}"), - ] - ) - idx += 6 - - tie_aimet_observer_groups(groups) diff --git a/qai_hub_models/models/googlenet_quantized/perf.yaml b/qai_hub_models/models/googlenet_quantized/perf.yaml index 9d5cd175..d51b481d 100644 --- a/qai_hub_models/models/googlenet_quantized/perf.yaml +++ b/qai_hub_models/models/googlenet_quantized/perf.yaml @@ -42,11 +42,11 @@ models: - name: GoogLeNetQuantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 296.0 - throughput: 3378.3783783783783 + inference_time: 298.0 + throughput: 3355.7046979865772 estimated_peak_memory_range: - min: 12288 - max: 1422272 + min: 20480 + max: 1284320 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +54,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: j1pvw2nkg + job_id: jegnr3kj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 337.0 - throughput: 2967.359050445104 + inference_time: 342.0 + throughput: 2923.9766081871344 estimated_peak_memory_range: - min: 12288 - max: 4317312 + min: 16384 + max: 10406440 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +69,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jmg94oqw5 + job_id: j1p8wzxqp job_status: Passed torchscript_onnx_ort: - inference_time: 607.0 - throughput: 1647.4464579901153 + inference_time: 523.0 + throughput: 1912.0458891013384 estimated_peak_memory_range: min: 12288 - max: 21096120 + max: 12422920 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 94 + layers_on_npu: 91 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 94 - job_id: jnp18om7g + total_layers: 91 + job_id: jw56qn7ng job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +93,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.096189Z' + timestamp: '2024-06-08T22:35:33Z' - torchscript_onnx_tflite: - inference_time: 215.0 - throughput: 4651.162790697675 + inference_time: 237.0 + throughput: 4219.4092827004215 estimated_peak_memory_range: min: 12288 - max: 33407968 + max: 34025648 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +107,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: j7gjl38vp + job_id: jopr1ewkg job_status: Passed torchscript_onnx_qnn: - inference_time: 243.0 - throughput: 4115.22633744856 + inference_time: 244.0 + throughput: 4098.360655737705 estimated_peak_memory_range: - min: 159744 - max: 43857088 + min: 0 + max: 42694240 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +122,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jnp18om8g + job_id: jogkr34v5 job_status: Passed torchscript_onnx_ort: - inference_time: 454.0 - throughput: 2202.643171806167 + inference_time: 393.0 + throughput: 2544.529262086514 estimated_peak_memory_range: - min: 581632 - max: 28507056 + min: 12288 + max: 30491248 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 94 + layers_on_npu: 91 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 94 - job_id: jvgdv6mzg + total_layers: 91 + job_id: j1p3qe9m5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +146,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.096253Z' + timestamp: '2024-06-08T22:35:34Z' - torchscript_onnx_tflite: inference_time: 298.0 throughput: 3355.7046979865772 estimated_peak_memory_range: - min: 12288 - max: 9463360 + min: 20480 + max: 1812976 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +160,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jlpev6no5 + job_id: jep23le6g job_status: Passed torchscript_onnx_qnn: - inference_time: 339.0 - throughput: 2949.8525073746314 + inference_time: 335.0 + throughput: 2985.0746268656717 estimated_peak_memory_range: - min: 28672 - max: 10150424 + min: 167936 + max: 10553224 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +175,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jz5w9yrmp + job_id: j1gle3x2p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +184,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.096292Z' + timestamp: '2024-06-08T22:35:32Z' - torchscript_onnx_tflite: - inference_time: 977.0 - throughput: 1023.5414534288639 + inference_time: 964.0 + throughput: 1037.344398340249 estimated_peak_memory_range: min: 12288 - max: 17947280 + max: 18322160 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,22 +198,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jygz7z0op - job_status: Passed - torchscript_onnx_qnn: - inference_time: 1026.0 - throughput: 974.6588693957115 - estimated_peak_memory_range: - min: 163840 - max: 38102800 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 86 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 86 - job_id: jmg94oq85 + job_id: jqpyv6m0p job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -222,13 +207,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-05-29T18:59:41.096330Z' + timestamp: '2024-06-08T22:35:26Z' - torchscript_onnx_tflite: - inference_time: 5627.0 - throughput: 177.7145903678692 + inference_time: 5711.0 + throughput: 175.1006828926633 estimated_peak_memory_range: - min: 12288 - max: 7365560 + min: 16384 + max: 2182760 primary_compute_unit: NPU precision: int8 layer_info: @@ -236,7 +221,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 84 - job_id: jz5w9yr3p + job_id: j2p0elj05 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -245,13 +230,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-05-29T18:59:41.096352Z' + timestamp: '2024-06-08T22:35:27Z' - torchscript_onnx_qnn: - inference_time: 461.0 - throughput: 2169.1973969631235 + inference_time: 438.0 + throughput: 2283.10502283105 estimated_peak_memory_range: - min: 516096 - max: 516096 + min: 536576 + max: 536576 primary_compute_unit: NPU precision: int8 layer_info: @@ -259,22 +244,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 86 - job_id: jvgdv6mrg + job_id: jn5q93yep job_status: Passed torchscript_onnx_ort: - inference_time: 645.0 - throughput: 1550.3875968992247 + inference_time: 526.0 + throughput: 1901.1406844106464 estimated_peak_memory_range: - min: 18911232 - max: 18911232 + min: 11812864 + max: 11812864 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 94 + layers_on_npu: 91 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 94 - job_id: jz57do895 + total_layers: 91 + job_id: jwgoe3r1p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -283,4 +268,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.096393Z' + timestamp: '2024-06-08T22:35:35Z' diff --git a/qai_hub_models/models/hrnet_pose/README.md b/qai_hub_models/models/hrnet_pose/README.md index 1291e266..d858ca38 100644 --- a/qai_hub_models/models/hrnet_pose/README.md +++ b/qai_hub_models/models/hrnet_pose/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of HRNetPose can be found [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Deep High-Resolution Representation Learning for Human Pose Estimation](https://arxiv.org/abs/1902.09212) diff --git a/qai_hub_models/models/hrnet_pose/export.py b/qai_hub_models/models/hrnet_pose/export.py index c4f04ec8..6853ea8f 100644 --- a/qai_hub_models/models/hrnet_pose/export.py +++ b/qai_hub_models/models/hrnet_pose/export.py @@ -187,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/hrnet_pose/perf.yaml b/qai_hub_models/models/hrnet_pose/perf.yaml index b68df421..26ccc19f 100644 --- a/qai_hub_models/models/hrnet_pose/perf.yaml +++ b/qai_hub_models/models/hrnet_pose/perf.yaml @@ -39,8 +39,8 @@ models: inference_time: 2822.0 throughput: 354.3586109142452 estimated_peak_memory_range: - min: 20480 - max: 2735056 + min: 28672 + max: 2472016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 516 - job_id: j0px10zlg + job_id: j7gjke715 job_status: Passed torchscript_onnx_qnn: - inference_time: 2884.0 - throughput: 346.74063800277395 + inference_time: 2908.0 + throughput: 343.878954607978 estimated_peak_memory_range: min: 16384 - max: 21559424 + max: 21168936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: jopryx77g + job_id: jz5wmq74g job_status: Passed torchscript_onnx_ort: - inference_time: 3038.0 - throughput: 329.1639236339697 + inference_time: 3074.0 + throughput: 325.30904359141186 estimated_peak_memory_range: - min: 81920 - max: 141558952 + min: 12288 + max: 131380776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 749 - job_id: j1p87jko5 + job_id: jz5wmq7zg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.139934Z' + timestamp: '2024-06-08T22:36:14Z' - torchscript_onnx_tflite: - inference_time: 2078.0 - throughput: 481.23195380173246 + inference_time: 2066.0 + throughput: 484.027105517909 estimated_peak_memory_range: min: 16384 - max: 108912400 + max: 109820208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 516 - job_id: jo5mz9l9p + job_id: jlpe4kz85 job_status: Passed torchscript_onnx_qnn: - inference_time: 2144.0 - throughput: 466.4179104477612 + inference_time: 2134.0 + throughput: 468.6035613870665 estimated_peak_memory_range: min: 606208 - max: 191567376 + max: 190071840 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: jep2mozq5 + job_id: jmg99wmmg job_status: Passed torchscript_onnx_ort: - inference_time: 2209.0 - throughput: 452.6935264825713 + inference_time: 2205.0 + throughput: 453.51473922902494 estimated_peak_memory_range: min: 12288 - max: 94173408 + max: 92302688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 749 - job_id: jogky6knp + job_id: jmg99wmqg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.140178Z' + timestamp: '2024-06-08T22:36:15Z' - torchscript_onnx_tflite: - inference_time: 2876.0 - throughput: 347.70514603616135 + inference_time: 2832.0 + throughput: 353.1073446327684 estimated_peak_memory_range: - min: 24576 - max: 3247784 + min: 28672 + max: 3094624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 516 - job_id: jegne1wqg + job_id: jygzvrm4p job_status: Passed torchscript_onnx_qnn: - inference_time: 2911.0 - throughput: 343.52456200618343 + inference_time: 2903.0 + throughput: 344.47123665173956 estimated_peak_memory_range: - min: 16384 - max: 20698712 + min: 12288 + max: 20792584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: j2p0roxnp + job_id: jvgd7o36g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.140315Z' + timestamp: '2024-06-08T22:36:13Z' - torchscript_onnx_qnn: - inference_time: 3152.0 - throughput: 317.25888324873097 + inference_time: 3132.0 + throughput: 319.28480204342276 estimated_peak_memory_range: - min: 589824 - max: 589824 + min: 897024 + max: 897024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 747 - job_id: jqpyd8ylp + job_id: jnp1qejng job_status: Passed torchscript_onnx_ort: - inference_time: 2994.0 - throughput: 334.001336005344 + inference_time: 2963.0 + throughput: 337.4957813027337 estimated_peak_memory_range: - min: 77676544 - max: 77676544 + min: 49115136 + max: 49115136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 749 - job_id: jn5q24do5 + job_id: jnp1qejkg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.140494Z' + timestamp: '2024-06-08T22:36:16Z' diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/README.md b/qai_hub_models/models/huggingface_wavlm_base_plus/README.md index fc585abc..570e2312 100644 --- a/qai_hub_models/models/huggingface_wavlm_base_plus/README.md +++ b/qai_hub_models/models/huggingface_wavlm_base_plus/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of HuggingFace-WavLM-Base-Plus can be found [here](https://github.com/microsoft/unilm/blob/master/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [WavLM: Large-Scale Self-Supervised Pre-Training for Full Stack Speech Processing](https://arxiv.org/abs/2110.13900) diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/export.py b/qai_hub_models/models/huggingface_wavlm_base_plus/export.py index 5a4c60da..ff097a89 100644 --- a/qai_hub_models/models/huggingface_wavlm_base_plus/export.py +++ b/qai_hub_models/models/huggingface_wavlm_base_plus/export.py @@ -171,7 +171,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -197,7 +197,12 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) + parser = export_parser( + model_cls=Model, + supports_qnn=False, + supports_ort=False, + supports_precompiled_ort=False, + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml b/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml index 48153308..cf928d5f 100644 --- a/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml +++ b/qai_hub_models/models/huggingface_wavlm_base_plus/perf.yaml @@ -36,11 +36,11 @@ models: - name: HuggingFace-WavLM-Base-Plus performance_metrics: - torchscript_onnx_tflite: - inference_time: 950768.0 - throughput: 1.0517812968042677 + inference_time: 920916.0 + throughput: 1.085875367568812 estimated_peak_memory_range: - min: 140214272 - max: 142718344 + min: 147881984 + max: 155477640 primary_compute_unit: CPU precision: fp32 layer_info: @@ -48,7 +48,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 811 total_layers: 811 - job_id: jegne1dqg + job_id: jw56qn8vg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -57,13 +57,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.210607Z' + timestamp: '2024-06-08T22:39:46Z' - torchscript_onnx_tflite: - inference_time: 804134.0 - throughput: 1.243573832221993 + inference_time: 819047.0 + throughput: 1.220931155354943 estimated_peak_memory_range: - min: 149458944 - max: 186089136 + min: 148029440 + max: 185119104 primary_compute_unit: CPU precision: fp32 layer_info: @@ -71,7 +71,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 811 total_layers: 811 - job_id: jopryxm7g + job_id: j1p3qezx5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -80,7 +80,7 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.210708Z' + timestamp: '2024-06-08T22:39:47Z' - torchscript_onnx_tflite: inference_time: 932003.0 throughput: 1.0729579196633487 @@ -103,12 +103,4 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.210802Z' - - reference_device_info: - name: Snapdragon X Elite CRD - os: '11' - form_factor: Compute - os_name: Windows - manufacturer: Qualcomm - chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.210809Z' + timestamp: '2024-05-23T16:02:38Z' diff --git a/qai_hub_models/models/inception_v3/README.md b/qai_hub_models/models/inception_v3/README.md index 0b085c5e..65bf345a 100644 --- a/qai_hub_models/models/inception_v3/README.md +++ b/qai_hub_models/models/inception_v3/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Inception-v3 can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Rethinking the Inception Architecture for Computer Vision](http://arxiv.org/abs/1512.00567) diff --git a/qai_hub_models/models/inception_v3/evaluate.py b/qai_hub_models/models/inception_v3/evaluate.py new file mode 100644 index 00000000..4bf4f8d2 --- /dev/null +++ b/qai_hub_models/models/inception_v3/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.inception_v3 import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/inception_v3/export.py b/qai_hub_models/models/inception_v3/export.py index 328357a4..868a0239 100644 --- a/qai_hub_models/models/inception_v3/export.py +++ b/qai_hub_models/models/inception_v3/export.py @@ -188,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/inception_v3/info.yaml b/qai_hub_models/models/inception_v3/info.yaml index 69856a43..66ecf428 100644 --- a/qai_hub_models/models/inception_v3/info.yaml +++ b/qai_hub_models/models/inception_v3/info.yaml @@ -40,3 +40,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/inception_v3/perf.yaml b/qai_hub_models/models/inception_v3/perf.yaml index 93777204..bf2b161e 100644 --- a/qai_hub_models/models/inception_v3/perf.yaml +++ b/qai_hub_models/models/inception_v3/perf.yaml @@ -36,11 +36,11 @@ models: - name: Inception-v3 performance_metrics: - torchscript_onnx_tflite: - inference_time: 1356.0 - throughput: 737.4631268436578 + inference_time: 1355.0 + throughput: 738.0073800738007 estimated_peak_memory_range: min: 24576 - max: 2049096 + max: 2203288 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: j1p3modng + job_id: jvgd7odkg job_status: Passed torchscript_onnx_qnn: - inference_time: 1412.0 - throughput: 708.2152974504249 + inference_time: 1424.0 + throughput: 702.2471910112359 estimated_peak_memory_range: - min: 12288 - max: 149845872 + min: 16384 + max: 150398664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: j7gjl39ep + job_id: j0pxeylj5 job_status: Passed torchscript_onnx_ort: - inference_time: 1729.0 - throughput: 578.368999421631 + inference_time: 1714.0 + throughput: 583.4305717619603 estimated_peak_memory_range: - min: 12288 - max: 205289944 + min: 24576 + max: 216921632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jmg94or85 + job_id: jep23loxg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.228489Z' + timestamp: '2024-06-08T22:40:24Z' - torchscript_onnx_tflite: - inference_time: 1028.0 - throughput: 972.7626459143969 + inference_time: 1026.0 + throughput: 974.6588693957115 estimated_peak_memory_range: min: 12288 - max: 52149168 + max: 54111920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: jwgovdxk5 + job_id: jz57vxeq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1045.0 - throughput: 956.9377990430622 + inference_time: 1055.0 + throughput: 947.8672985781991 estimated_peak_memory_range: - min: 618496 - max: 62520720 + min: 0 + max: 64200016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jlpev6qv5 + job_id: jo5mv30y5 job_status: Passed torchscript_onnx_ort: - inference_time: 1335.0 - throughput: 749.0636704119851 + inference_time: 1328.0 + throughput: 753.0120481927711 estimated_peak_memory_range: - min: 618496 - max: 34471808 + min: 0 + max: 33764336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jnp18o97g + job_id: jqpyv68rp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.228583Z' + timestamp: '2024-06-08T22:40:25Z' - torchscript_onnx_tflite: - inference_time: 1349.0 - throughput: 741.2898443291327 + inference_time: 1355.0 + throughput: 738.0073800738007 estimated_peak_memory_range: - min: 24576 - max: 2179216 + min: 16384 + max: 2130328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 129 - job_id: j1pvw28rg + job_id: jqp4jvyqp job_status: Passed torchscript_onnx_qnn: - inference_time: 1404.0 - throughput: 712.2507122507122 + inference_time: 1411.0 + throughput: 708.7172218284904 estimated_peak_memory_range: - min: 69632 - max: 149743848 + min: 0 + max: 150030456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jz5w9ykmp + job_id: jopr1exvg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.228643Z' + timestamp: '2024-06-08T22:40:23Z' - torchscript_onnx_qnn: - inference_time: 1519.0 - throughput: 658.3278472679394 + inference_time: 1503.0 + throughput: 665.335994677312 estimated_peak_memory_range: - min: 602112 - max: 602112 + min: 1097728 + max: 1097728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 219 - job_id: jygz7z6xp + job_id: jegnr31v5 job_status: Passed torchscript_onnx_ort: - inference_time: 1677.0 - throughput: 596.3029218843172 + inference_time: 1640.0 + throughput: 609.7560975609756 estimated_peak_memory_range: - min: 47853568 - max: 47853568 + min: 39940096 + max: 39940096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 221 - job_id: jvgdv6kzg + job_id: j2p0elo25 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.228713Z' + timestamp: '2024-06-08T22:40:26Z' diff --git a/qai_hub_models/models/inception_v3_quantized/README.md b/qai_hub_models/models/inception_v3_quantized/README.md index c9f4b556..b326f00a 100644 --- a/qai_hub_models/models/inception_v3_quantized/README.md +++ b/qai_hub_models/models/inception_v3_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Inception-v3-Quantized can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Rethinking the Inception Architecture for Computer Vision](http://arxiv.org/abs/1512.00567) diff --git a/qai_hub_models/models/inception_v3_quantized/evaluate.py b/qai_hub_models/models/inception_v3_quantized/evaluate.py index a4f88114..47341fcd 100644 --- a/qai_hub_models/models/inception_v3_quantized/evaluate.py +++ b/qai_hub_models/models/inception_v3_quantized/evaluate.py @@ -2,17 +2,21 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + from __future__ import annotations import warnings import qai_hub as hub -from qai_hub_models.models.inception_v3 import Model as FP16Model from qai_hub_models.models.inception_v3_quantized import MODEL_ID, Model from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.base_model import BaseModel from qai_hub_models.utils.evaluate import evaluate_on_dataset from qai_hub_models.utils.inference import compile_model_from_args +from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin SUPPORTED_DATASETS = ["imagenette", "imagenet"] @@ -20,9 +24,12 @@ def main(): warnings.filterwarnings("ignore") parser = evaluate_parser( - model_cls=Model, default_split_size=2500, supported_datasets=SUPPORTED_DATASETS + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, ) args = parser.parse_args() + args.device = None if args.hub_model_id is not None: hub_model = hub.get_model(args.hub_model_id) @@ -30,8 +37,14 @@ def main(): hub_model = compile_model_from_args( MODEL_ID, args, get_model_kwargs(Model, vars(args)) ) - hub_device = get_hub_device(args.device, args.chipset) - torch_model = FP16Model.from_pretrained(**get_model_kwargs(FP16Model, vars(args))) + hub_device = get_hub_device(None, args.chipset) + + # Use Fp16 model for torch inference + for cls in Model.__mro__: + if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin): + torch_cls = cls + break + torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args))) evaluate_on_dataset( hub_model, torch_model, diff --git a/qai_hub_models/models/inception_v3_quantized/export.py b/qai_hub_models/models/inception_v3_quantized/export.py index cfd584a1..e6cd4f44 100644 --- a/qai_hub_models/models/inception_v3_quantized/export.py +++ b/qai_hub_models/models/inception_v3_quantized/export.py @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/inception_v3_quantized/info.yaml b/qai_hub_models/models/inception_v3_quantized/info.yaml index c3d40275..4f3446a8 100644 --- a/qai_hub_models/models/inception_v3_quantized/info.yaml +++ b/qai_hub_models/models/inception_v3_quantized/info.yaml @@ -43,3 +43,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/inception_v3_quantized/model.py b/qai_hub_models/models/inception_v3_quantized/model.py index a53c2f4f..2a74e221 100644 --- a/qai_hub_models/models/inception_v3_quantized/model.py +++ b/qai_hub_models/models/inception_v3_quantized/model.py @@ -22,7 +22,7 @@ from qai_hub_models.utils.asset_loaders import CachedWebModelAsset from qai_hub_models.utils.quantization_aimet import ( constrain_quantized_inputs_to_image_range, - tie_aimet_observer_groups, + tie_observers, ) MODEL_ID = __name__.split(".")[-2] @@ -75,7 +75,7 @@ def from_pretrained( config_file=get_default_aimet_config(), dummy_input=torch.rand(input_shape), ) - cls._tie_pre_concat_quantizers(sim) + tie_observers(sim) constrain_quantized_inputs_to_image_range(sim) if aimet_encodings: @@ -87,111 +87,3 @@ def from_pretrained( sim.model.eval() return cls(sim) - - @classmethod - def _tie_pre_concat_quantizers(cls, sim: QuantizationSimModel): - """ - This ties together the output quantizers prior to concatenations. This - prevents unnecessary re-quantization during the concatenation, and even - avoids fatal TFLite converter errors. - """ - - n = sim.model.net - groups = [ - [ - n.maxpool2, - n.Mixed_5b.module_avg_pool2d, - ], - [ - n.Mixed_5b.branch1x1.module_relu_5, - n.Mixed_5b.branch5x5_2.module_relu_7, - n.Mixed_5b.branch3x3dbl_3.module_relu_10, - n.Mixed_5b.branch_pool.module_relu_11, - n.Mixed_5b.module_cat, - n.Mixed_5c.module_avg_pool2d_1, - ], - [ - n.Mixed_5c.branch1x1.module_relu_12, - n.Mixed_5c.branch5x5_2.module_relu_14, - n.Mixed_5c.branch3x3dbl_3.module_relu_17, - n.Mixed_5c.branch_pool.module_relu_18, - n.Mixed_5c.module_cat_1, - n.Mixed_5d.module_avg_pool2d_2, - ], - [ - n.Mixed_5d.branch1x1.module_relu_19, - n.Mixed_5d.branch5x5_2.module_relu_21, - n.Mixed_5d.branch3x3dbl_3.module_relu_24, - n.Mixed_5d.branch_pool.module_relu_25, - n.Mixed_5d.module_cat_2, - # This group has a branch with only a max pool, - # this requires the two concat groups to merge - n.Mixed_6a.branch3x3.module_relu_26, - n.Mixed_6a.branch3x3dbl_3.module_relu_29, - n.Mixed_6a.module_max_pool2d, - n.Mixed_6a.module_cat_3, - n.Mixed_6b.module_avg_pool2d_3, - ], - [ - n.Mixed_6b.branch1x1.module_relu_30, - n.Mixed_6b.branch7x7_3.module_relu_33, - n.Mixed_6b.branch7x7dbl_5.module_relu_38, - n.Mixed_6b.branch_pool.module_relu_39, - n.Mixed_6b.module_cat_4, - n.Mixed_6c.module_avg_pool2d_4, - ], - [ - n.Mixed_6c.branch1x1.module_relu_40, - n.Mixed_6c.branch7x7_3.module_relu_43, - n.Mixed_6c.branch7x7dbl_5.module_relu_48, - n.Mixed_6c.branch_pool.module_relu_49, - n.Mixed_6c.module_cat_5, - n.Mixed_6d.module_avg_pool2d_5, - ], - [ - n.Mixed_6d.branch1x1.module_relu_50, - n.Mixed_6d.branch7x7_3.module_relu_53, - n.Mixed_6d.branch7x7dbl_5.module_relu_58, - n.Mixed_6d.branch_pool.module_relu_59, - n.Mixed_6d.module_cat_6, - n.Mixed_6e.module_avg_pool2d_6, - ], - [ - n.Mixed_6e.branch1x1.module_relu_60, - n.Mixed_6e.branch7x7_3.module_relu_63, - n.Mixed_6e.branch7x7dbl_5.module_relu_68, - n.Mixed_6e.branch_pool.module_relu_69, - n.Mixed_6e.module_cat_7, - # This group has a branch with only a max pool, - # this requires the two concat groups to merge - n.Mixed_7a.branch3x3_2.module_relu_71, - n.Mixed_7a.branch7x7x3_4.module_relu_75, - n.Mixed_7a.module_max_pool2d_1, - n.Mixed_7a.module_cat_8, - n.Mixed_7b.module_avg_pool2d_7, - ], - [ - n.Mixed_7b.branch1x1.module_relu_76, - n.Mixed_7b.branch3x3_2a.module_relu_78, - n.Mixed_7b.branch3x3_2b.module_relu_79, - n.Mixed_7b.branch3x3dbl_3a.module_relu_82, - n.Mixed_7b.branch3x3dbl_3b.module_relu_83, - n.Mixed_7b.branch_pool.module_relu_84, - n.Mixed_7b.module_cat_9, - n.Mixed_7b.module_cat_10, - n.Mixed_7b.module_cat_11, - n.Mixed_7c.module_avg_pool2d_8, - ], - [ - n.Mixed_7c.branch1x1.module_relu_85, - n.Mixed_7c.branch3x3_2a.module_relu_87, - n.Mixed_7c.branch3x3_2b.module_relu_88, - n.Mixed_7c.branch3x3dbl_3a.module_relu_91, - n.Mixed_7c.branch3x3dbl_3b.module_relu_92, - n.Mixed_7c.branch_pool.module_relu_93, - n.Mixed_7c.module_cat_12, - n.Mixed_7c.module_cat_13, - n.Mixed_7c.module_cat_14, - ], - ] - tie_aimet_observer_groups(groups) diff --git a/qai_hub_models/models/inception_v3_quantized/perf.yaml b/qai_hub_models/models/inception_v3_quantized/perf.yaml index 07c91205..a770e878 100644 --- a/qai_hub_models/models/inception_v3_quantized/perf.yaml +++ b/qai_hub_models/models/inception_v3_quantized/perf.yaml @@ -42,11 +42,11 @@ models: - name: Inception-v3-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 617.0 - throughput: 1620.7455429497568 + inference_time: 615.0 + throughput: 1626.0162601626016 estimated_peak_memory_range: min: 12288 - max: 1887024 + max: 1478976 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +54,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jo5mz979p + job_id: jogkr36y5 job_status: Passed torchscript_onnx_qnn: - inference_time: 645.0 - throughput: 1550.3875968992247 + inference_time: 646.0 + throughput: 1547.9876160990711 estimated_peak_memory_range: - min: 221184 - max: 60468504 + min: 12288 + max: 165286688 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +69,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 134 - job_id: j2p0rownp + job_id: jwgoe3d4p job_status: Passed torchscript_onnx_ort: - inference_time: 932.0 - throughput: 1072.961373390558 + inference_time: 844.0 + throughput: 1184.8341232227488 estimated_peak_memory_range: - min: 65536 - max: 32199936 + min: 12288 + max: 65222768 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 137 + layers_on_npu: 134 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 137 - job_id: jw561okyp + total_layers: 134 + job_id: jygzvrzzp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +93,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.263421Z' + timestamp: '2024-06-08T22:41:32Z' - torchscript_onnx_tflite: - inference_time: 494.0 - throughput: 2024.2914979757086 + inference_time: 486.0 + throughput: 2057.61316872428 estimated_peak_memory_range: min: 12288 - max: 64806240 + max: 67571472 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +107,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jegne14qg + job_id: jn5q9347p job_status: Passed torchscript_onnx_qnn: - inference_time: 502.0 - throughput: 1992.03187250996 + inference_time: 496.0 + throughput: 2016.1290322580646 estimated_peak_memory_range: min: 167936 - max: 54785280 + max: 54564464 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +122,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 134 - job_id: j1p87jno5 + job_id: j1pvzv27g job_status: Passed torchscript_onnx_ort: - inference_time: 728.0 - throughput: 1373.6263736263736 + inference_time: 659.0 + throughput: 1517.4506828528072 estimated_peak_memory_range: - min: 618496 - max: 41725008 + min: 12288 + max: 43078608 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 137 + layers_on_npu: 134 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 137 - job_id: j1p3moyng + total_layers: 134 + job_id: jz5wmqyzg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +146,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.263500Z' + timestamp: '2024-06-08T22:41:33Z' - torchscript_onnx_tflite: - inference_time: 613.0 - throughput: 1631.3213703099511 + inference_time: 625.0 + throughput: 1600.0 estimated_peak_memory_range: - min: 16384 - max: 1745384 + min: 24576 + max: 1548872 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +160,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jopryxr7g + job_id: j1gle3wep job_status: Passed torchscript_onnx_qnn: inference_time: 648.0 throughput: 1543.20987654321 estimated_peak_memory_range: - min: 0 - max: 6304608 + min: 36864 + max: 39620504 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +175,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 134 - job_id: jn5q24no5 + job_id: jlpe4k675 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +184,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.263550Z' + timestamp: '2024-06-08T22:41:31Z' - torchscript_onnx_tflite: - inference_time: 2343.0 - throughput: 426.8032437046522 + inference_time: 2357.0 + throughput: 424.26813746287655 estimated_peak_memory_range: - min: 16384 - max: 21927008 + min: 12288 + max: 22249744 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,22 +198,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jep2mo1q5 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 2406.0 - throughput: 415.6275976724855 - estimated_peak_memory_range: - min: 12288 - max: 51696096 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 134 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 134 - job_id: j1glkwjmp + job_id: jw56qnovg job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -222,13 +207,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-05-29T18:59:41.263601Z' + timestamp: '2024-06-08T22:41:26Z' - torchscript_onnx_tflite: - inference_time: 7547.0 - throughput: 132.50298131707964 + inference_time: 7805.0 + throughput: 128.12299807815504 estimated_peak_memory_range: - min: 167936 - max: 7482520 + min: 16384 + max: 2215816 primary_compute_unit: NPU precision: int8 layer_info: @@ -236,7 +221,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jqpyd8llp + job_id: j1p3qeox5 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -245,13 +230,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-05-29T18:59:41.263629Z' + timestamp: '2024-06-08T22:41:27Z' - torchscript_onnx_qnn: - inference_time: 710.0 - throughput: 1408.4507042253522 + inference_time: 706.0 + throughput: 1416.4305949008499 estimated_peak_memory_range: - min: 446464 - max: 446464 + min: 450560 + max: 450560 primary_compute_unit: NPU precision: int8 layer_info: @@ -259,22 +244,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 134 - job_id: jogky61np + job_id: j7gjke375 job_status: Passed torchscript_onnx_ort: - inference_time: 898.0 - throughput: 1113.5857461024498 + inference_time: 782.0 + throughput: 1278.772378516624 estimated_peak_memory_range: - min: 41455616 - max: 41455616 + min: 12218368 + max: 12218368 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 137 + layers_on_npu: 134 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 137 - job_id: jwgovdjk5 + total_layers: 134 + job_id: jmg99woqg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -283,4 +268,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.263679Z' + timestamp: '2024-06-08T22:41:34Z' diff --git a/qai_hub_models/models/lama_dilated/README.md b/qai_hub_models/models/lama_dilated/README.md index 511bdc4a..a418710e 100644 --- a/qai_hub_models/models/lama_dilated/README.md +++ b/qai_hub_models/models/lama_dilated/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of LaMa-Dilated can be found [here](https://github.com/advimman/lama/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Resolution-robust Large Mask Inpainting with Fourier Convolutions](https://arxiv.org/abs/2109.07161) diff --git a/qai_hub_models/models/lama_dilated/export.py b/qai_hub_models/models/lama_dilated/export.py index 87478f48..b4b49854 100644 --- a/qai_hub_models/models/lama_dilated/export.py +++ b/qai_hub_models/models/lama_dilated/export.py @@ -190,7 +190,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/lama_dilated/perf.yaml b/qai_hub_models/models/lama_dilated/perf.yaml index d59888e4..87e48fb1 100644 --- a/qai_hub_models/models/lama_dilated/perf.yaml +++ b/qai_hub_models/models/lama_dilated/perf.yaml @@ -36,26 +36,26 @@ models: - name: LaMa-Dilated performance_metrics: - torchscript_onnx_tflite: - inference_time: 87216.0 - throughput: 11.465786094294625 + inference_time: 86343.0 + throughput: 11.581714788691613 estimated_peak_memory_range: - min: 2220032 - max: 138408472 + min: 3289088 + max: 139370192 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 347 + layers_on_npu: 344 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 347 - job_id: jygz7z1xp + total_layers: 344 + job_id: jvgd7o6kg job_status: Passed torchscript_onnx_qnn: - inference_time: 81054.0 - throughput: 12.33745404298369 + inference_time: 81307.0 + throughput: 12.299064041226462 estimated_peak_memory_range: - min: 3207168 - max: 44584112 + min: 3371008 + max: 42726616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +63,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: jvgdv6jzg + job_id: j0pxey0j5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,28 +72,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.307510Z' + timestamp: '2024-06-08T22:42:12Z' - torchscript_onnx_tflite: - inference_time: 59838.0 - throughput: 16.7117884956048 + inference_time: 59391.0 + throughput: 16.837567981680728 estimated_peak_memory_range: - min: 2932736 - max: 243876640 + min: 53248 + max: 241657616 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 347 + layers_on_npu: 344 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 347 - job_id: jmg94o685 + total_layers: 344 + job_id: jz57vxoq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 56713.0 - throughput: 17.632641546030012 + inference_time: 57168.0 + throughput: 17.492303386509935 estimated_peak_memory_range: - min: 1187840 - max: 164342080 + min: 2736128 + max: 165991776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +101,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: jz57doq95 + job_id: jo5mv39y5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,28 +110,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.307597Z' + timestamp: '2024-06-08T22:42:13Z' - torchscript_onnx_tflite: - inference_time: 87348.0 - throughput: 11.448459037413564 + inference_time: 85709.0 + throughput: 11.667386155479589 estimated_peak_memory_range: - min: 3272704 - max: 139025312 + min: 3477504 + max: 138753616 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 347 + layers_on_npu: 344 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 347 - job_id: jnp18or7g + total_layers: 344 + job_id: jqp4jveqp job_status: Passed torchscript_onnx_qnn: - inference_time: 81632.0 - throughput: 12.250098000784007 + inference_time: 81015.0 + throughput: 12.343393198790347 estimated_peak_memory_range: - min: 3194880 - max: 39290392 + min: 3174400 + max: 43648896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -139,7 +139,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: j0px10wlg + job_id: jopr1edvg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -148,10 +148,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.307681Z' + timestamp: '2024-06-08T22:42:16Z' - torchscript_onnx_qnn: - inference_time: 92179.0 - throughput: 10.848457891710693 + inference_time: 91919.0 + throughput: 10.879143593816295 estimated_peak_memory_range: min: 4202496 max: 4202496 @@ -162,7 +162,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 333 - job_id: jqp4wez1g + job_id: jegnr3qv5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -171,4 +171,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.307729Z' + timestamp: '2024-06-08T22:42:14Z' diff --git a/qai_hub_models/models/litehrnet/README.md b/qai_hub_models/models/litehrnet/README.md index 13fa47e0..d44fd6a6 100644 --- a/qai_hub_models/models/litehrnet/README.md +++ b/qai_hub_models/models/litehrnet/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of LiteHRNet can be found [here](https://github.com/HRNet/Lite-HRNet/blob/hrnet/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Lite-HRNet: A Lightweight High-Resolution Network](https://arxiv.org/abs/2104.06403) diff --git a/qai_hub_models/models/litehrnet/export.py b/qai_hub_models/models/litehrnet/export.py index 81e07f3a..9dba5402 100644 --- a/qai_hub_models/models/litehrnet/export.py +++ b/qai_hub_models/models/litehrnet/export.py @@ -172,7 +172,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -201,7 +201,12 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) + parser = export_parser( + model_cls=Model, + supports_qnn=False, + supports_ort=False, + supports_precompiled_ort=False, + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/litehrnet/perf.yaml b/qai_hub_models/models/litehrnet/perf.yaml index 87dcc012..64bcf8e7 100644 --- a/qai_hub_models/models/litehrnet/perf.yaml +++ b/qai_hub_models/models/litehrnet/perf.yaml @@ -36,11 +36,11 @@ models: - name: LiteHRNet performance_metrics: - torchscript_onnx_tflite: - inference_time: 11098.0 - throughput: 90.10632546404757 + inference_time: 11261.0 + throughput: 88.80206020779683 estimated_peak_memory_range: - min: 6336512 - max: 9866728 + min: 6529024 + max: 13390128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,7 +48,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 10 total_layers: 1236 - job_id: jqpyd89lp + job_id: jogkr30y5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -57,13 +57,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.334245Z' + timestamp: '2024-06-08T22:42:54Z' - torchscript_onnx_tflite: - inference_time: 7918.0 - throughput: 126.2945188178833 + inference_time: 7629.0 + throughput: 131.07877834578582 estimated_peak_memory_range: - min: 16384 - max: 74961808 + min: 6545408 + max: 86932832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -71,7 +71,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 10 total_layers: 1236 - job_id: j1p87jlo5 + job_id: jn5q9317p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -80,13 +80,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.334395Z' + timestamp: '2024-06-08T22:42:55Z' - torchscript_onnx_tflite: - inference_time: 11183.0 - throughput: 89.42144326209424 + inference_time: 11181.0 + throughput: 89.43743851176103 estimated_peak_memory_range: - min: 6533120 - max: 9097208 + min: 6561792 + max: 18010528 primary_compute_unit: NPU precision: fp16 layer_info: @@ -94,7 +94,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 10 total_layers: 1236 - job_id: jogky6jnp + job_id: j1gle38ep job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -103,12 +103,4 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.334538Z' - - reference_device_info: - name: Snapdragon X Elite CRD - os: '11' - form_factor: Compute - os_name: Windows - manufacturer: Qualcomm - chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.334546Z' + timestamp: '2024-06-08T22:42:56Z' diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/README.md b/qai_hub_models/models/llama_v2_7b_chat_quantized/README.md index 97d5d37b..54e3e3f8 100644 --- a/qai_hub_models/models/llama_v2_7b_chat_quantized/README.md +++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/README.md @@ -70,7 +70,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Llama-v2-7B-Chat can be found [here](https://github.com/facebookresearch/llama/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://github.com/facebookresearch/llama/blob/main/LICENSE) ## References * [LLaMA: Open and Efficient Foundation Language Models](https://arxiv.org/abs/2302.13971) diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/model.py b/qai_hub_models/models/llama_v2_7b_chat_quantized/model.py index 72ea13aa..5dc3fda2 100644 --- a/qai_hub_models/models/llama_v2_7b_chat_quantized/model.py +++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/model.py @@ -256,7 +256,7 @@ def _get_past_key_names(start: int = 0, end: int = 8, suffix=""): def _get_output_names_from_split(split_part: int = 1): layer_start, layer_end = _get_hidden_layer_range_from_split(split_part=split_part) - output_list = [f"layers_{layer_end-1}_add_out_0"] + output_list = [f"layers_{layer_end - 1}_add_out_0"] output_list += _get_past_key_names(layer_start, layer_end, suffix="_out") return output_list @@ -414,7 +414,7 @@ def _get_llama_model_with_split( os.path.join( AIMET_ENCODINGS_PREFIX, model_encoding_tag, - f"llama_{model_encoding_tag}_sha_{split_part-1}.encodings", + f"llama_{model_encoding_tag}_sha_{split_part - 1}.encodings", ) ) aimet_encodings = str( diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/perf.yaml b/qai_hub_models/models/llama_v2_7b_chat_quantized/perf.yaml index 19c64c6c..b8c5ad10 100644 --- a/qai_hub_models/models/llama_v2_7b_chat_quantized/perf.yaml +++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/perf.yaml @@ -24,6 +24,29 @@ models: primary_compute_unit: NPU job_id: "null" job_status: Passed + - reference_device_info: + name: Snapdragon X Elite CRD + os: '11' + form_factor: Compute + os_name: Windows + manufacturer: Qualcomm + chipset: Snapdragon® X Elite + timestamp: '2024-05-23T00:34:02.549319Z' + torchscript_onnx_qnn: + inference_time: 118139 + throughput: 8.46 + estimated_peak_memory_range: + min: 68124672 + max: 68124672 + layer_info: + layers_on_npu: 34842 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 34842 + precision: uint16 + primary_compute_unit: NPU + job_id: "null" + job_status: Passed - name: Llama-PromptProcessor-Quantized performance_metrics: - reference_device_info: @@ -49,15 +72,40 @@ models: primary_compute_unit: NPU job_id: "null" job_status: Passed + - reference_device_info: + name: Snapdragon X Elite CRD + os: '11' + form_factor: Compute + os_name: Windows + manufacturer: Qualcomm + chipset: Snapdragon® X Elite + timestamp: '2024-05-23T00:34:02.549319Z' + torchscript_onnx_qnn: + inference_time: 2302575 + throughput: 445.21 + estimated_peak_memory_range: + min: 10788864 + max: 10788864 + layer_info: + layers_on_npu: 31766 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 31766 + precision: uint16 + primary_compute_unit: NPU + job_id: "null" + job_status: Passed aggregated: supported_devices: - Samsung Galaxy S23 Ultra - Samsung Galaxy S24 + - Snapdragon X Elite CRD supported_oses: - Android supported_chipsets: - Snapdragon® 8 Gen 2 - Snapdragon® 8 Gen 3 + - Snapdragon® X Elite performance_metrics: - reference_device_info: name: Samsung Galaxy S23 Ultra diff --git a/qai_hub_models/models/mediapipe_face/README.md b/qai_hub_models/models/mediapipe_face/README.md index 1a280d17..a565c33d 100644 --- a/qai_hub_models/models/mediapipe_face/README.md +++ b/qai_hub_models/models/mediapipe_face/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of MediaPipe-Face-Detection can be found [here](https://github.com/zmurez/MediaPipePyTorch/blob/master/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs](https://arxiv.org/abs/1907.05047) diff --git a/qai_hub_models/models/mediapipe_face/export.py b/qai_hub_models/models/mediapipe_face/export.py index 67ca40df..adb92a20 100644 --- a/qai_hub_models/models/mediapipe_face/export.py +++ b/qai_hub_models/models/mediapipe_face/export.py @@ -194,7 +194,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/mediapipe_face/perf.yaml b/qai_hub_models/models/mediapipe_face/perf.yaml index bf8dcabd..ed6dfc34 100644 --- a/qai_hub_models/models/mediapipe_face/perf.yaml +++ b/qai_hub_models/models/mediapipe_face/perf.yaml @@ -36,11 +36,11 @@ models: - name: MediaPipeFaceDetector performance_metrics: - torchscript_onnx_tflite: - inference_time: 779.0 - throughput: 1283.6970474967907 + inference_time: 781.0 + throughput: 1280.4097311139565 estimated_peak_memory_range: - min: 12288 - max: 3969936 + min: 90112 + max: 2155184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: j1pvw23rg + job_id: j7gjkey75 job_status: Passed torchscript_onnx_qnn: - inference_time: 839.0 - throughput: 1191.8951132300358 + inference_time: 835.0 + throughput: 1197.6047904191616 estimated_peak_memory_range: - min: 20480 - max: 5908152 + min: 16384 + max: 101864120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 148 - job_id: jnp18o07g + job_id: jvgd7o4kg job_status: Passed torchscript_onnx_ort: - inference_time: 1003.0 - throughput: 997.0089730807578 + inference_time: 986.0 + throughput: 1014.1987829614604 estimated_peak_memory_range: - min: 2125824 - max: 11100896 + min: 552960 + max: 8114576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: j0px10v8g + job_id: jqpyv6yrp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.352382Z' + timestamp: '2024-06-08T22:45:23Z' - torchscript_onnx_tflite: - inference_time: 575.0 - throughput: 1739.1304347826087 + inference_time: 543.0 + throughput: 1841.6206261510129 estimated_peak_memory_range: - min: 12288 - max: 29783776 + min: 16384 + max: 31618960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: jlpev69v5 + job_id: jygzvryzp job_status: Passed torchscript_onnx_qnn: inference_time: 593.0 throughput: 1686.3406408094436 estimated_peak_memory_range: - min: 12288 - max: 48781760 + min: 802816 + max: 49388544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 148 - job_id: jz5w9yo4p + job_id: jqp4jv4qp job_status: Passed torchscript_onnx_ort: - inference_time: 734.0 - throughput: 1362.3978201634877 + inference_time: 706.0 + throughput: 1416.4305949008499 estimated_peak_memory_range: - min: 12288 - max: 23671680 + min: 548864 + max: 22898592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jegne12jg + job_id: j1p8wzkzp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.352462Z' + timestamp: '2024-06-08T22:45:25Z' - torchscript_onnx_tflite: - inference_time: 775.0 - throughput: 1290.3225806451612 + inference_time: 779.0 + throughput: 1283.6970474967907 estimated_peak_memory_range: - min: 32768 - max: 1633176 + min: 12288 + max: 1532120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 112 - job_id: jz5w9yomp + job_id: jmg99w2qg job_status: Passed torchscript_onnx_qnn: - inference_time: 842.0 - throughput: 1187.648456057007 + inference_time: 835.0 + throughput: 1197.6047904191616 estimated_peak_memory_range: - min: 819200 - max: 7053256 + min: 806912 + max: 77885504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 148 - job_id: jz57dozn5 + job_id: jopr1e7vg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.352513Z' + timestamp: '2024-06-08T22:45:21Z' - torchscript_onnx_qnn: - inference_time: 931.0 - throughput: 1074.1138560687432 + inference_time: 928.0 + throughput: 1077.5862068965516 estimated_peak_memory_range: min: 786432 max: 786432 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jnp18o0ng + job_id: jo5mv3ky5 job_status: Passed torchscript_onnx_ort: - inference_time: 999.0 - throughput: 1001.001001001001 + inference_time: 997.0 + throughput: 1003.0090270812437 estimated_peak_memory_range: - min: 3235840 - max: 3235840 + min: 5971968 + max: 5971968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jep2mo865 + job_id: jn5q93d7p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,15 +216,15 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.352570Z' + timestamp: '2024-06-08T22:45:27Z' - name: MediaPipeFaceLandmarkDetector performance_metrics: - torchscript_onnx_tflite: - inference_time: 320.0 - throughput: 3125.0 + inference_time: 318.0 + throughput: 3144.6540880503144 estimated_peak_memory_range: - min: 20480 - max: 1923360 + min: 12288 + max: 2130328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -232,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 101 - job_id: j7gjl3xep + job_id: jlpe4kx75 job_status: Passed torchscript_onnx_qnn: - inference_time: 384.0 - throughput: 2604.1666666666665 + inference_time: 391.0 + throughput: 2557.544757033248 estimated_peak_memory_range: - min: 462848 - max: 3777488 + min: 131072 + max: 98992544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -247,14 +247,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 107 - job_id: jvgdv6wzg + job_id: jz57vxnq5 job_status: Passed torchscript_onnx_ort: - inference_time: 491.0 - throughput: 2036.6598778004072 + inference_time: 532.0 + throughput: 1879.6992481203008 estimated_peak_memory_range: - min: 16384 - max: 40070192 + min: 12288 + max: 84060104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -262,7 +262,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jo5mz9r7p + job_id: j2p0elx25 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -271,13 +271,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.352636Z' + timestamp: '2024-06-08T22:45:24Z' - torchscript_onnx_tflite: - inference_time: 247.0 - throughput: 4048.582995951417 + inference_time: 224.0 + throughput: 4464.285714285715 estimated_peak_memory_range: - min: 12288 - max: 26073488 + min: 16384 + max: 27155600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -285,14 +285,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 101 - job_id: jygz7zexp + job_id: jz5wmqzzg job_status: Passed torchscript_onnx_qnn: - inference_time: 281.0 - throughput: 3558.7188612099644 + inference_time: 283.0 + throughput: 3533.5689045936397 estimated_peak_memory_range: - min: 12288 - max: 37232720 + min: 458752 + max: 40876896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -300,14 +300,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 107 - job_id: jmg94ovm5 + job_id: j0pxeyrj5 job_status: Passed torchscript_onnx_ort: - inference_time: 353.0 - throughput: 2832.8611898016998 + inference_time: 375.0 + throughput: 2666.6666666666665 estimated_peak_memory_range: min: 12288 - max: 22272016 + max: 19616240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -315,7 +315,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jopryxkkg + job_id: jogkr3ky5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -324,13 +324,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.352701Z' + timestamp: '2024-06-08T22:45:26Z' - torchscript_onnx_tflite: - inference_time: 311.0 - throughput: 3215.434083601286 + inference_time: 309.0 + throughput: 3236.2459546925566 estimated_peak_memory_range: - min: 24576 - max: 4438632 + min: 12288 + max: 1641680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -338,14 +338,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 101 - job_id: jmg94ov85 + job_id: jnp1qe1kg job_status: Passed torchscript_onnx_qnn: - inference_time: 386.0 - throughput: 2590.6735751295337 + inference_time: 395.0 + throughput: 2531.6455696202534 estimated_peak_memory_range: - min: 458752 - max: 3741368 + min: 290816 + max: 8822944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -353,7 +353,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 107 - job_id: jqp4weq2g + job_id: jep23lzxg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -362,10 +362,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.352746Z' + timestamp: '2024-06-08T22:45:22Z' - torchscript_onnx_qnn: - inference_time: 520.0 - throughput: 1923.076923076923 + inference_time: 497.0 + throughput: 2012.0724346076458 estimated_peak_memory_range: min: 442368 max: 442368 @@ -376,14 +376,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jvgdv6w6g + job_id: jegnr3wv5 job_status: Passed torchscript_onnx_ort: - inference_time: 526.0 - throughput: 1901.1406844106464 + inference_time: 521.0 + throughput: 1919.3857965451057 estimated_peak_memory_range: - min: 4198400 - max: 4198400 + min: 5312512 + max: 5312512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -391,7 +391,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 106 - job_id: jqpyd8e0p + job_id: j1gle3qep job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -400,4 +400,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.352791Z' + timestamp: '2024-06-08T22:45:27Z' diff --git a/qai_hub_models/models/mediapipe_hand/README.md b/qai_hub_models/models/mediapipe_hand/README.md index 8e327a5a..0e49e035 100644 --- a/qai_hub_models/models/mediapipe_hand/README.md +++ b/qai_hub_models/models/mediapipe_hand/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of MediaPipe-Hand-Detection can be found [here](https://github.com/zmurez/MediaPipePyTorch/blob/master/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [MediaPipe Hands: On-device Real-time Hand Tracking](https://arxiv.org/abs/2006.10214) diff --git a/qai_hub_models/models/mediapipe_hand/export.py b/qai_hub_models/models/mediapipe_hand/export.py index df8302cd..0100ddad 100644 --- a/qai_hub_models/models/mediapipe_hand/export.py +++ b/qai_hub_models/models/mediapipe_hand/export.py @@ -194,7 +194,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/mediapipe_hand/perf.yaml b/qai_hub_models/models/mediapipe_hand/perf.yaml index 53e99545..67190c80 100644 --- a/qai_hub_models/models/mediapipe_hand/perf.yaml +++ b/qai_hub_models/models/mediapipe_hand/perf.yaml @@ -36,26 +36,26 @@ models: - name: MediaPipeHandDetector performance_metrics: - torchscript_onnx_tflite: - inference_time: 955.0 - throughput: 1047.1204188481674 + inference_time: 2260.0 + throughput: 442.4778761061947 estimated_peak_memory_range: min: 12288 - max: 2084224 + max: 11649504 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 152 + layers_on_npu: 148 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 152 - job_id: jn5q24ke5 + layers_on_cpu: 2 + total_layers: 150 + job_id: jwgoe394p job_status: Passed torchscript_onnx_qnn: - inference_time: 1012.0 - throughput: 988.1422924901186 + inference_time: 1017.0 + throughput: 983.284169124877 estimated_peak_memory_range: - min: 16384 - max: 10188288 + min: 20480 + max: 21650176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jlpev6185 + job_id: jmg99wqqg job_status: Passed torchscript_onnx_ort: - inference_time: 1202.0 - throughput: 831.9467554076539 + inference_time: 1164.0 + throughput: 859.106529209622 estimated_peak_memory_range: - min: 40960 - max: 16016072 + min: 12288 + max: 18412096 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 196 - job_id: jegne1yjg + job_id: jqp4jv6lp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,28 +87,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.415401Z' + timestamp: '2024-06-08T22:46:02Z' - torchscript_onnx_tflite: - inference_time: 703.0 - throughput: 1422.475106685633 + inference_time: 1902.0 + throughput: 525.7623554153523 estimated_peak_memory_range: min: 12288 - max: 54231920 + max: 50595712 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 152 + layers_on_npu: 148 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 152 - job_id: jw561ojnp + layers_on_cpu: 2 + total_layers: 150 + job_id: j7gjke875 job_status: Passed torchscript_onnx_qnn: - inference_time: 724.0 - throughput: 1381.2154696132598 + inference_time: 722.0 + throughput: 1385.0415512465374 estimated_peak_memory_range: min: 802816 - max: 60616784 + max: 60773680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: jz5w9yv4p + job_id: jvgd7omkg job_status: Passed torchscript_onnx_ort: - inference_time: 846.0 - throughput: 1182.033096926714 + inference_time: 837.0 + throughput: 1194.7431302270013 estimated_peak_memory_range: - min: 425984 - max: 36661744 + min: 323584 + max: 36752192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 196 - job_id: jqpyd8w0p + job_id: jo5mv31q5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,28 +140,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.415495Z' + timestamp: '2024-06-08T22:46:04Z' - torchscript_onnx_tflite: - inference_time: 954.0 - throughput: 1048.2180293501049 + inference_time: 2331.0 + throughput: 429.000429000429 estimated_peak_memory_range: - min: 16384 - max: 4494792 + min: 36864 + max: 2444200 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 152 + layers_on_npu: 148 layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 152 - job_id: jwgovd015 + layers_on_cpu: 2 + total_layers: 150 + job_id: jygzvr0zp job_status: Passed torchscript_onnx_qnn: - inference_time: 1013.0 - throughput: 987.1668311944719 + inference_time: 1015.0 + throughput: 985.2216748768473 estimated_peak_memory_range: - min: 12288 - max: 11646912 + min: 806912 + max: 10668872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 197 - job_id: j0px10j8g + job_id: jvgd7omlg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.415555Z' + timestamp: '2024-06-08T22:46:01Z' - torchscript_onnx_qnn: - inference_time: 1058.0 - throughput: 945.179584120983 + inference_time: 1036.0 + throughput: 965.2509652509652 estimated_peak_memory_range: min: 786432 max: 786432 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 196 - job_id: jvgdv696g + job_id: jmg99wqvg job_status: Passed torchscript_onnx_ort: - inference_time: 1250.0 - throughput: 800.0 + inference_time: 1182.0 + throughput: 846.0236886632825 estimated_peak_memory_range: - min: 905216 - max: 905216 + min: 704512 + max: 704512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 196 - job_id: j1p87jvq5 + job_id: jopr1emeg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,7 +216,7 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.415621Z' + timestamp: '2024-06-08T22:46:06Z' - name: MediaPipeHandLandmarkDetector performance_metrics: - torchscript_onnx_tflite: @@ -224,7 +224,7 @@ models: throughput: 829.8755186721992 estimated_peak_memory_range: min: 12288 - max: 1536064 + max: 2551752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -232,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 159 - job_id: j1glkwz2p + job_id: j1pvzvn7g job_status: Passed torchscript_onnx_qnn: inference_time: 1299.0 throughput: 769.8229407236336 estimated_peak_memory_range: - min: 806912 - max: 9217032 + min: 802816 + max: 8940712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -247,14 +247,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 210 - job_id: jygz7z94p + job_id: jnp1qemkg job_status: Passed torchscript_onnx_ort: - inference_time: 1591.0 - throughput: 628.5355122564425 + inference_time: 1521.0 + throughput: 657.4621959237344 estimated_peak_memory_range: - min: 28672 - max: 19720496 + min: 12288 + max: 143178688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -262,7 +262,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 209 - job_id: jopryxqkg + job_id: j0pxey895 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -271,13 +271,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.415714Z' + timestamp: '2024-06-08T22:46:03Z' - torchscript_onnx_tflite: - inference_time: 896.0 - throughput: 1116.0714285714287 + inference_time: 903.0 + throughput: 1107.4197120708748 estimated_peak_memory_range: min: 12288 - max: 57199744 + max: 59093296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -285,14 +285,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 159 - job_id: j1p3mo3mg + job_id: jlpe4kn75 job_status: Passed torchscript_onnx_qnn: - inference_time: 966.0 - throughput: 1035.1966873706003 + inference_time: 962.0 + throughput: 1039.5010395010395 estimated_peak_memory_range: min: 802816 - max: 65744144 + max: 66542112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -300,14 +300,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 210 - job_id: jnp18olng + job_id: jz5wmqrjg job_status: Passed torchscript_onnx_ort: - inference_time: 1136.0 - throughput: 880.2816901408451 + inference_time: 1121.0 + throughput: 892.0606601248885 estimated_peak_memory_range: - min: 724992 - max: 31554048 + min: 802816 + max: 30698880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -315,7 +315,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 209 - job_id: j2p0ro70p + job_id: jegnr3dm5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -324,13 +324,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.415805Z' + timestamp: '2024-06-08T22:46:05Z' - torchscript_onnx_tflite: - inference_time: 1200.0 - throughput: 833.3333333333334 + inference_time: 1196.0 + throughput: 836.1204013377926 estimated_peak_memory_range: min: 28672 - max: 1701744 + max: 1643304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -338,14 +338,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 159 - job_id: j1pvw2ozg + job_id: jz5wmqrzg job_status: Passed torchscript_onnx_qnn: - inference_time: 1302.0 - throughput: 768.0491551459294 + inference_time: 1318.0 + throughput: 758.7253414264036 estimated_peak_memory_range: - min: 819200 - max: 42331408 + min: 294912 + max: 52198264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -353,7 +353,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 210 - job_id: jo5mz927p + job_id: jz57vx1r5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -362,13 +362,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.415867Z' + timestamp: '2024-06-08T22:46:01Z' - torchscript_onnx_qnn: - inference_time: 1468.0 - throughput: 681.1989100817439 + inference_time: 1513.0 + throughput: 660.9385327164574 estimated_peak_memory_range: - min: 786432 - max: 786432 + min: 1150976 + max: 1150976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -376,14 +376,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 209 - job_id: jz57down5 + job_id: jnp1qemlg job_status: Passed torchscript_onnx_ort: - inference_time: 1556.0 - throughput: 642.6735218508998 + inference_time: 1551.0 + throughput: 644.7453255963894 estimated_peak_memory_range: - min: 18542592 - max: 18542592 + min: 20062208 + max: 20062208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -391,7 +391,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 209 - job_id: jn5q24oe5 + job_id: jep23lqmg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -400,4 +400,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.415965Z' + timestamp: '2024-06-08T22:46:06Z' diff --git a/qai_hub_models/models/mediapipe_pose/README.md b/qai_hub_models/models/mediapipe_pose/README.md index 97b007ee..a63082b8 100644 --- a/qai_hub_models/models/mediapipe_pose/README.md +++ b/qai_hub_models/models/mediapipe_pose/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of MediaPipe-Pose-Estimation can be found [here](https://github.com/zmurez/MediaPipePyTorch/blob/master/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [BlazePose: On-device Real-time Body Pose tracking](https://arxiv.org/abs/2006.10204) diff --git a/qai_hub_models/models/mediapipe_pose/export.py b/qai_hub_models/models/mediapipe_pose/export.py index 7bfbb700..4fd3fb91 100644 --- a/qai_hub_models/models/mediapipe_pose/export.py +++ b/qai_hub_models/models/mediapipe_pose/export.py @@ -194,7 +194,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/mediapipe_pose/perf.yaml b/qai_hub_models/models/mediapipe_pose/perf.yaml index 6bce9977..5bcb6fa5 100644 --- a/qai_hub_models/models/mediapipe_pose/perf.yaml +++ b/qai_hub_models/models/mediapipe_pose/perf.yaml @@ -36,11 +36,11 @@ models: - name: MediaPipePoseDetector performance_metrics: - torchscript_onnx_tflite: - inference_time: 824.0 - throughput: 1213.5922330097087 + inference_time: 850.0 + throughput: 1176.4705882352941 estimated_peak_memory_range: - min: 20480 - max: 1577288 + min: 32768 + max: 1863416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 107 - job_id: jwgovdq15 + job_id: j1p8wzd8p job_status: Passed torchscript_onnx_qnn: - inference_time: 876.0 - throughput: 1141.552511415525 + inference_time: 880.0 + throughput: 1136.3636363636363 estimated_peak_memory_range: - min: 212992 - max: 5551744 + min: 2273280 + max: 7352768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jmg94oem5 + job_id: jwgoe37dp job_status: Passed torchscript_onnx_ort: - inference_time: 1024.0 - throughput: 976.5625 + inference_time: 1001.0 + throughput: 999.000999000999 estimated_peak_memory_range: - min: 28672 - max: 9153824 + min: 471040 + max: 10697640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: jopryxjkg + job_id: jvgd7oylg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.478689Z' + timestamp: '2024-06-08T22:46:43Z' - torchscript_onnx_tflite: - inference_time: 595.0 - throughput: 1680.672268907563 + inference_time: 621.0 + throughput: 1610.3059581320451 estimated_peak_memory_range: - min: 25280512 - max: 65999520 + min: 61440 + max: 42407216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 107 - job_id: j7gjl341p + job_id: jn5q93xmp job_status: Passed torchscript_onnx_qnn: - inference_time: 634.0 - throughput: 1577.2870662460568 + inference_time: 633.0 + throughput: 1579.778830963665 estimated_peak_memory_range: - min: 0 - max: 42973232 + min: 208896 + max: 48822992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jvgdv6l6g + job_id: j7gjke685 job_status: Passed torchscript_onnx_ort: - inference_time: 720.0 - throughput: 1388.888888888889 + inference_time: 772.0 + throughput: 1295.3367875647668 estimated_peak_memory_range: min: 212992 - max: 30255216 + max: 32138320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: jqpyd800p + job_id: jqp4jv7lp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.478764Z' + timestamp: '2024-06-08T22:46:44Z' - torchscript_onnx_tflite: - inference_time: 851.0 - throughput: 1175.0881316098707 + inference_time: 830.0 + throughput: 1204.8192771084337 estimated_peak_memory_range: min: 20480 - max: 1974952 + max: 1868176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 107 - job_id: jygz7zk4p + job_id: jw56qn97g job_status: Passed torchscript_onnx_qnn: - inference_time: 877.0 - throughput: 1140.2508551881415 + inference_time: 888.0 + throughput: 1126.126126126126 estimated_peak_memory_range: - min: 225280 - max: 15717856 + min: 16384 + max: 128786224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jo5mz9y7p + job_id: jmg99w7vg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.478813Z' + timestamp: '2024-06-08T22:46:41Z' - torchscript_onnx_qnn: - inference_time: 1047.0 - throughput: 955.1098376313277 + inference_time: 1086.0 + throughput: 920.8103130755064 estimated_peak_memory_range: - min: 507904 - max: 507904 + min: 1765376 + max: 1765376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: jqp4we02g + job_id: jygzvrq6p job_status: Passed torchscript_onnx_ort: - inference_time: 1055.0 - throughput: 947.8672985781991 + inference_time: 1038.0 + throughput: 963.3911368015414 estimated_peak_memory_range: - min: 909312 - max: 909312 + min: 3256320 + max: 3256320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: j1p87ryq5 + job_id: jo5mv37q5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,15 +216,15 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.478866Z' + timestamp: '2024-06-08T22:46:46Z' - name: MediaPipePoseLandmarkDetector performance_metrics: - torchscript_onnx_tflite: - inference_time: 1281.0 - throughput: 780.64012490242 + inference_time: 1205.0 + throughput: 829.8755186721992 estimated_peak_memory_range: - min: 12288 - max: 2189288 + min: 200704 + max: 2517320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -232,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 230 - job_id: j1pvw2xzg + job_id: jogkr3wo5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1294.0 - throughput: 772.7975270479135 + inference_time: 1306.0 + throughput: 765.6967840735069 estimated_peak_memory_range: - min: 12288 - max: 14061248 + min: 16384 + max: 13996512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -247,14 +247,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 306 - job_id: jnp18oxng + job_id: j1pvzvymg job_status: Passed torchscript_onnx_ort: - inference_time: 1681.0 - throughput: 594.883997620464 + inference_time: 1647.0 + throughput: 607.1645415907711 estimated_peak_memory_range: - min: 81920 - max: 24691208 + min: 12288 + max: 25082496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -262,7 +262,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 304 - job_id: jep2mon65 + job_id: jz57vxmr5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -271,13 +271,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.478982Z' + timestamp: '2024-06-08T22:46:43Z' - torchscript_onnx_tflite: - inference_time: 885.0 - throughput: 1129.9435028248588 + inference_time: 864.0 + throughput: 1157.4074074074074 estimated_peak_memory_range: - min: 16384 - max: 88333840 + min: 12288 + max: 90560000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -285,14 +285,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 230 - job_id: jlpev6385 + job_id: j1gle39lp job_status: Passed torchscript_onnx_qnn: - inference_time: 939.0 - throughput: 1064.9627263045793 + inference_time: 944.0 + throughput: 1059.322033898305 estimated_peak_memory_range: min: 802816 - max: 89518496 + max: 88829488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -300,14 +300,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 306 - job_id: jz57do3n5 + job_id: jlpe4k005 job_status: Passed torchscript_onnx_ort: - inference_time: 1187.0 - throughput: 842.4599831508003 + inference_time: 1101.0 + throughput: 908.2652134423251 estimated_peak_memory_range: - min: 512000 - max: 41583520 + min: 802816 + max: 39260784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -315,7 +315,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 304 - job_id: j2p0r900p + job_id: j0pxeyq95 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -324,13 +324,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.479097Z' + timestamp: '2024-06-08T22:46:45Z' - torchscript_onnx_tflite: - inference_time: 1229.0 - throughput: 813.6696501220505 + inference_time: 1214.0 + throughput: 823.7232289950576 estimated_peak_memory_range: - min: 16384 - max: 2829328 + min: 24576 + max: 2611056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -338,14 +338,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 230 - job_id: jz5w9yn4p + job_id: j1p3qelz5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1303.0 - throughput: 767.4597083653108 + inference_time: 1308.0 + throughput: 764.525993883792 estimated_peak_memory_range: - min: 802816 - max: 11490360 + min: 434176 + max: 15229872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -353,7 +353,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 306 - job_id: jegne18jg + job_id: jnp1qeklg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -362,10 +362,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.479173Z' + timestamp: '2024-06-08T22:46:41Z' - torchscript_onnx_qnn: - inference_time: 1494.0 - throughput: 669.3440428380187 + inference_time: 1463.0 + throughput: 683.526999316473 estimated_peak_memory_range: min: 786432 max: 786432 @@ -376,14 +376,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 305 - job_id: j0px1028g + job_id: jz5wmq0jg job_status: Passed torchscript_onnx_ort: - inference_time: 1596.0 - throughput: 626.5664160401003 + inference_time: 1886.0 + throughput: 530.2226935312831 estimated_peak_memory_range: - min: 7958528 - max: 7958528 + min: 19697664 + max: 19697664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -391,7 +391,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 304 - job_id: jogky0xvp + job_id: jegnr34m5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -400,4 +400,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.479256Z' + timestamp: '2024-06-08T22:46:46Z' diff --git a/qai_hub_models/models/mediapipe_selfie/README.md b/qai_hub_models/models/mediapipe_selfie/README.md index fd842e56..ec08249a 100644 --- a/qai_hub_models/models/mediapipe_selfie/README.md +++ b/qai_hub_models/models/mediapipe_selfie/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of MediaPipe-Selfie-Segmentation can be found [here](https://github.com/google/mediapipe/blob/master/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Image segmentation guide](https://developers.google.com/mediapipe/solutions/vision/image_segmenter/) diff --git a/qai_hub_models/models/mediapipe_selfie/export.py b/qai_hub_models/models/mediapipe_selfie/export.py index f137ad46..374affc3 100644 --- a/qai_hub_models/models/mediapipe_selfie/export.py +++ b/qai_hub_models/models/mediapipe_selfie/export.py @@ -187,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/mediapipe_selfie/perf.yaml b/qai_hub_models/models/mediapipe_selfie/perf.yaml index 913ec699..7f0d5fe1 100644 --- a/qai_hub_models/models/mediapipe_selfie/perf.yaml +++ b/qai_hub_models/models/mediapipe_selfie/perf.yaml @@ -36,11 +36,11 @@ models: - name: MediaPipe-Selfie-Segmentation performance_metrics: - torchscript_onnx_tflite: - inference_time: 810.0 - throughput: 1234.567901234568 + inference_time: 806.0 + throughput: 1240.6947890818858 estimated_peak_memory_range: - min: 24576 - max: 1664768 + min: 12288 + max: 2385600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jw561m4np + job_id: jqpyv6l4p job_status: Passed torchscript_onnx_qnn: - inference_time: 792.0 - throughput: 1262.6262626262626 + inference_time: 784.0 + throughput: 1275.5102040816328 estimated_peak_memory_range: - min: 28672 - max: 20649280 + min: 2240512 + max: 96205696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: j7gjlyn1p + job_id: jogkr31o5 job_status: Passed torchscript_onnx_ort: - inference_time: 1339.0 - throughput: 746.8259895444362 + inference_time: 1346.0 + throughput: 742.9420505200594 estimated_peak_memory_range: - min: 684032 - max: 56261672 + min: 786432 + max: 76785816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jmg942nm5 + job_id: j1p3qedz5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.579849Z' + timestamp: '2024-06-08T22:47:12Z' - torchscript_onnx_tflite: - inference_time: 523.0 - throughput: 1912.0458891013384 + inference_time: 537.0 + throughput: 1862.1973929236499 estimated_peak_memory_range: - min: 16384 - max: 23767456 + min: 12288 + max: 24988016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: jwgovw615 + job_id: j2p0elwe5 job_status: Passed torchscript_onnx_qnn: inference_time: 512.0 throughput: 1953.125 estimated_peak_memory_range: min: 176128 - max: 42148896 + max: 45965632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jlpevxm85 + job_id: jn5q93nmp job_status: Passed torchscript_onnx_ort: - inference_time: 912.0 - throughput: 1096.4912280701753 + inference_time: 904.0 + throughput: 1106.1946902654868 estimated_peak_memory_range: min: 12288 - max: 19240112 + max: 20791344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jnp181zng + job_id: jwgoe3xdp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.579933Z' + timestamp: '2024-06-08T22:47:13Z' - torchscript_onnx_tflite: - inference_time: 810.0 - throughput: 1234.567901234568 + inference_time: 803.0 + throughput: 1245.3300124533 estimated_peak_memory_range: min: 24576 - max: 1962504 + max: 1606304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 118 - job_id: j1pvwmkzg + job_id: j1p8wzn8p job_status: Passed torchscript_onnx_qnn: - inference_time: 783.0 - throughput: 1277.139208173691 + inference_time: 782.0 + throughput: 1278.772378516624 estimated_peak_memory_range: - min: 806912 - max: 8305720 + min: 24576 + max: 12402272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jz5w9z64p + job_id: jw56qnx7g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.580005Z' + timestamp: '2024-06-08T22:47:11Z' - torchscript_onnx_qnn: - inference_time: 1023.0 - throughput: 977.5171065493646 + inference_time: 920.0 + throughput: 1086.9565217391305 estimated_peak_memory_range: min: 786432 max: 786432 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 138 - job_id: jygz7yd4p + job_id: j1gle3dlp job_status: Passed torchscript_onnx_ort: - inference_time: 1373.0 - throughput: 728.3321194464676 + inference_time: 1362.0 + throughput: 734.2143906020558 estimated_peak_memory_range: - min: 1335296 - max: 1335296 + min: 2674688 + max: 2674688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 140 - job_id: jvgdv416g + job_id: j1pvzv8mg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.580058Z' + timestamp: '2024-06-08T22:47:14Z' diff --git a/qai_hub_models/models/midas/README.md b/qai_hub_models/models/midas/README.md index dbf100cc..8eed4994 100644 --- a/qai_hub_models/models/midas/README.md +++ b/qai_hub_models/models/midas/README.md @@ -18,6 +18,11 @@ a hosted Qualcomm® device. ## Example & Usage +Install the package via pip: +```bash +pip install "qai_hub_models[midas]" +``` + Once installed, run the following simple CLI demo: @@ -43,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Midas-V2 can be found [here](https://github.com/isl-org/MiDaS/blob/master/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer](https://arxiv.org/abs/1907.01341v3) diff --git a/qai_hub_models/models/midas/export.py b/qai_hub_models/models/midas/export.py index 61e5504a..adcb9fbe 100644 --- a/qai_hub_models/models/midas/export.py +++ b/qai_hub_models/models/midas/export.py @@ -188,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/midas/model.py b/qai_hub_models/models/midas/model.py index 3f6b1d53..669ecdd0 100644 --- a/qai_hub_models/models/midas/model.py +++ b/qai_hub_models/models/midas/model.py @@ -4,15 +4,35 @@ # --------------------------------------------------------------------- from __future__ import annotations +import sys + import torch +from qai_hub_models.utils.asset_loaders import ( + CachedWebModelAsset, + SourceAsRoot, + find_replace_in_repo, + load_torch, + tmp_os_env, + wipe_sys_modules, +) from qai_hub_models.utils.base_model import BaseModel from qai_hub_models.utils.image_processing import normalize_image_torchvision from qai_hub_models.utils.input_spec import InputSpec MODEL_ID = __name__.split(".")[-2] -MODEL_ASSET_VERSION = 1 -DEFAULT_WEIGHTS = "MiDaS_small" +MODEL_ASSET_VERSION = 2 + +SOURCE_REPO = "https://github.com/isl-org/MiDaS/" +REPO_COMMIT = "bdc4ed64c095e026dc0a2f17cabb14d58263decb" +DEFAULT_WEIGHTS = CachedWebModelAsset( + "https://github.com/isl-org/MiDaS/releases/download/v2_1/midas_v21_small_256.pt", + MODEL_ID, + MODEL_ASSET_VERSION, + "midas_v21_small_256.pt", +) +DEFAULT_HEIGHT = 256 +DEFAULT_WIDTH = 256 class Midas(BaseModel): @@ -21,21 +41,76 @@ class Midas(BaseModel): def __init__( self, model: torch.nn.Module, + height: int = DEFAULT_HEIGHT, + width: int = DEFAULT_WIDTH, normalize_input: bool = True, ) -> None: super().__init__() self.model = model self.normalize_input = normalize_input + self.height = height + self.width = width @classmethod - def from_pretrained(cls, weights: str = DEFAULT_WEIGHTS) -> Midas: - model = torch.hub.load("intel-isl/MiDaS", weights).eval() + def from_pretrained( + cls, + weights: str = DEFAULT_WEIGHTS, + height: int = DEFAULT_HEIGHT, + width: int = DEFAULT_WIDTH, + ) -> Midas: + with SourceAsRoot( + SOURCE_REPO, + REPO_COMMIT, + MODEL_ID, + MODEL_ASSET_VERSION, + keep_sys_modules=True, + ) as repo_root: + # Temporarily set torch home to the local repo so modules get cloned + # locally and we can modify their code. + with tmp_os_env( + {"TORCH_HOME": repo_root, "height": str(height), "width": str(width)} + ): + # Load the dependent module first to ensure the code gets cloned. + # Then wipe the cached modules and make necessary code changes. + torch.hub.load( + "rwightman/gen-efficientnet-pytorch", + "tf_efficientnet_lite3", + pretrained=False, + skip_validation=True, + ) + wipe_sys_modules(sys.modules["geffnet"]) + + # The default implementation creates the self.pad layer within the + # forward function itself, which makes it untraceable by aimet. + find_replace_in_repo( + repo_root, + "hub/rwightman_gen-efficientnet-pytorch_master/geffnet/conv2d_layers.py", + "self.pad = None", + "self.pad = nn.ZeroPad2d(_same_pad_arg((int(os.environ['height']), int(os.environ['width'])), self.weight.shape[-2:], self.stride, self.dilation))", + ) + find_replace_in_repo( + repo_root, + "hub/rwightman_gen-efficientnet-pytorch_master/geffnet/conv2d_layers.py", + "import math", + "import math; import os", + ) + + from hubconf import MiDaS_small + + model = MiDaS_small(pretrained=False) + weights = load_torch(weights) + model.load_state_dict(weights) return cls(model) @staticmethod - def get_input_spec(height: int = 256, width: int = 256) -> InputSpec: + def get_input_spec( + height: int = DEFAULT_HEIGHT, width: int = DEFAULT_WIDTH + ) -> InputSpec: return {"image": ((1, 3, height, width), "float32")} + def _get_input_spec_for_instance(self) -> InputSpec: + return self.__class__.get_input_spec(self.height, self.width) + def forward(self, image): """ Runs the model on an image tensor and returns a tensor of depth estimates diff --git a/qai_hub_models/models/midas/perf.yaml b/qai_hub_models/models/midas/perf.yaml index 1d16582c..047b62cc 100644 --- a/qai_hub_models/models/midas/perf.yaml +++ b/qai_hub_models/models/midas/perf.yaml @@ -36,11 +36,11 @@ models: - name: Midas-V2 performance_metrics: - torchscript_onnx_tflite: - inference_time: 3425.0 - throughput: 291.97080291970804 + inference_time: 3428.0 + throughput: 291.71528588098016 estimated_peak_memory_range: - min: 16384 - max: 2561720 + min: 12288 + max: 2878504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: jo5mzkx7p + job_id: jlpe4kq05 job_status: Passed torchscript_onnx_qnn: - inference_time: 3375.0 - throughput: 296.2962962962963 + inference_time: 3372.0 + throughput: 296.55990510083035 estimated_peak_memory_range: - min: 16384 - max: 18697592 + min: 806912 + max: 11534808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 199 - job_id: jep2mdy65 + job_id: jmg99wrvg job_status: Passed torchscript_onnx_ort: - inference_time: 3586.0 - throughput: 278.8622420524261 + inference_time: 3451.0 + throughput: 289.77108084613155 estimated_peak_memory_range: min: 12288 - max: 158594312 + max: 177641176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 199 - job_id: jn5q216e5 + job_id: jqp4jvzlp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.614171Z' + timestamp: '2024-06-08T22:47:43Z' - torchscript_onnx_tflite: - inference_time: 2439.0 - throughput: 410.0041000410004 + inference_time: 2407.0 + throughput: 415.45492314083924 estimated_peak_memory_range: - min: 16384 - max: 78764672 + min: 12288 + max: 82857536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: jegneqvjg + job_id: jygzvr66p job_status: Passed torchscript_onnx_qnn: - inference_time: 2405.0 - throughput: 415.8004158004158 + inference_time: 2404.0 + throughput: 415.97337770382694 estimated_peak_memory_range: min: 802816 - max: 64511072 + max: 65062640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 199 - job_id: j2p0r9z0p + job_id: jnp1qe9lg job_status: Passed torchscript_onnx_ort: - inference_time: 2424.0 - throughput: 412.54125412541254 + inference_time: 2416.0 + throughput: 413.9072847682119 estimated_peak_memory_range: - min: 802816 - max: 41121248 + min: 389120 + max: 38273760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 199 - job_id: j1glk8v2p + job_id: j0pxeyw95 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.614261Z' + timestamp: '2024-06-08T22:47:44Z' - torchscript_onnx_tflite: - inference_time: 3433.0 - throughput: 291.29041654529567 + inference_time: 3435.0 + throughput: 291.1208151382824 estimated_peak_memory_range: min: 16384 - max: 2080552 + max: 2408992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 139 - job_id: jopryd3kg + job_id: jz5wmqkjg job_status: Passed torchscript_onnx_qnn: - inference_time: 3365.0 - throughput: 297.1768202080238 + inference_time: 3369.0 + throughput: 296.8239833778569 estimated_peak_memory_range: - min: 12288 - max: 18671464 + min: 802816 + max: 11302408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 199 - job_id: jogky0evp + job_id: jz57vxqr5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.614318Z' + timestamp: '2024-06-08T22:47:42Z' - torchscript_onnx_qnn: - inference_time: 3591.0 - throughput: 278.473962684489 + inference_time: 3529.0 + throughput: 283.36639274582035 estimated_peak_memory_range: min: 786432 max: 786432 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 199 - job_id: j1p87rqq5 + job_id: jvgd7oklg job_status: Passed torchscript_onnx_ort: - inference_time: 3422.0 - throughput: 292.22676797194623 + inference_time: 3447.0 + throughput: 290.1073397156948 estimated_peak_memory_range: - min: 34041856 - max: 34041856 + min: 9965568 + max: 9965568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 199 - job_id: jw561mynp + job_id: jo5mv3jq5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.614384Z' + timestamp: '2024-06-08T22:47:45Z' diff --git a/qai_hub_models/models/midas/requirements.txt b/qai_hub_models/models/midas/requirements.txt new file mode 100644 index 00000000..70fad942 --- /dev/null +++ b/qai_hub_models/models/midas/requirements.txt @@ -0,0 +1 @@ +timm==1.0.3 diff --git a/qai_hub_models/models/mnasnet05/README.md b/qai_hub_models/models/mnasnet05/README.md index f17444f4..ab0d56a1 100644 --- a/qai_hub_models/models/mnasnet05/README.md +++ b/qai_hub_models/models/mnasnet05/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of MNASNet05 can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [MnasNet: Platform-Aware Neural Architecture Search for Mobile](https://arxiv.org/abs/1807.11626) diff --git a/qai_hub_models/models/mnasnet05/evaluate.py b/qai_hub_models/models/mnasnet05/evaluate.py new file mode 100644 index 00000000..c6de56aa --- /dev/null +++ b/qai_hub_models/models/mnasnet05/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.mnasnet05 import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/mnasnet05/export.py b/qai_hub_models/models/mnasnet05/export.py index 389bfe46..ed49880c 100644 --- a/qai_hub_models/models/mnasnet05/export.py +++ b/qai_hub_models/models/mnasnet05/export.py @@ -188,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/mnasnet05/info.yaml b/qai_hub_models/models/mnasnet05/info.yaml index c3ce91e0..1f87a21c 100644 --- a/qai_hub_models/models/mnasnet05/info.yaml +++ b/qai_hub_models/models/mnasnet05/info.yaml @@ -40,3 +40,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/mnasnet05/perf.yaml b/qai_hub_models/models/mnasnet05/perf.yaml index 7f636ea6..0ca342e3 100644 --- a/qai_hub_models/models/mnasnet05/perf.yaml +++ b/qai_hub_models/models/mnasnet05/perf.yaml @@ -36,11 +36,11 @@ models: - name: MNASNet05 performance_metrics: - torchscript_onnx_tflite: - inference_time: 781.0 - throughput: 1280.4097311139565 + inference_time: 782.0 + throughput: 1278.772378516624 estimated_peak_memory_range: - min: 24576 - max: 1909392 + min: 20480 + max: 1961704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: j7gjlyv1p + job_id: jlpe4kj05 job_status: Passed torchscript_onnx_qnn: inference_time: 826.0 throughput: 1210.6537530266344 estimated_peak_memory_range: - min: 12288 - max: 35312208 + min: 618496 + max: 5537568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jz5w9ze4p + job_id: jmg99w6vg job_status: Passed torchscript_onnx_ort: - inference_time: 763.0 - throughput: 1310.615989515072 + inference_time: 798.0 + throughput: 1253.1328320802006 estimated_peak_memory_range: min: 12288 - max: 18924944 + max: 155086488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: jz5w9zezp + job_id: jqp4jvqlp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.648776Z' + timestamp: '2024-06-08T22:49:16Z' - torchscript_onnx_tflite: - inference_time: 523.0 - throughput: 1912.0458891013384 + inference_time: 546.0 + throughput: 1831.5018315018315 estimated_peak_memory_range: - min: 16384 - max: 45982896 + min: 12288 + max: 46076672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jlpevxd85 + job_id: jygzvr16p job_status: Passed torchscript_onnx_qnn: - inference_time: 563.0 - throughput: 1776.1989342806394 + inference_time: 564.0 + throughput: 1773.049645390071 estimated_peak_memory_range: min: 0 - max: 39385456 + max: 41703392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jmg942lm5 + job_id: jnp1qerlg job_status: Passed torchscript_onnx_ort: - inference_time: 551.0 - throughput: 1814.8820326678765 + inference_time: 560.0 + throughput: 1785.7142857142858 estimated_peak_memory_range: - min: 618496 - max: 28569088 + min: 31727616 + max: 59957408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: jmg942lq5 + job_id: j0pxeyv95 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.648842Z' + timestamp: '2024-06-08T22:49:17Z' - torchscript_onnx_tflite: - inference_time: 780.0 - throughput: 1282.051282051282 + inference_time: 773.0 + throughput: 1293.6610608020699 estimated_peak_memory_range: min: 12288 - max: 1638504 + max: 159861568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jygz7y34p + job_id: jz5wmqjjg job_status: Passed torchscript_onnx_qnn: - inference_time: 823.0 - throughput: 1215.0668286755772 + inference_time: 826.0 + throughput: 1210.6537530266344 estimated_peak_memory_range: - min: 28672 - max: 34400464 + min: 16384 + max: 14027976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jvgdv4x6g + job_id: jz57vxzr5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.648882Z' + timestamp: '2024-06-08T22:49:15Z' - torchscript_onnx_qnn: - inference_time: 941.0 - throughput: 1062.6992561105208 + inference_time: 946.0 + throughput: 1057.0824524312895 estimated_peak_memory_range: - min: 602112 - max: 602112 + min: 606208 + max: 606208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jnp1814ng + job_id: jvgd7ojlg job_status: Passed torchscript_onnx_ort: - inference_time: 819.0 - throughput: 1221.001221001221 + inference_time: 807.0 + throughput: 1239.1573729863692 estimated_peak_memory_range: - min: 19140608 - max: 19140608 + min: 18001920 + max: 18001920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 104 - job_id: jnp1814kg + job_id: jo5mv3rq5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.648928Z' + timestamp: '2024-06-08T22:49:18Z' diff --git a/qai_hub_models/models/mobilenet_v2/README.md b/qai_hub_models/models/mobilenet_v2/README.md index 4c9f4616..7426d634 100644 --- a/qai_hub_models/models/mobilenet_v2/README.md +++ b/qai_hub_models/models/mobilenet_v2/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of MobileNet-v2 can be found [here](https://github.com/tonylins/pytorch-mobilenet-v2/blob/master/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381) diff --git a/qai_hub_models/models/mobilenet_v2/evaluate.py b/qai_hub_models/models/mobilenet_v2/evaluate.py new file mode 100644 index 00000000..63c5f4a4 --- /dev/null +++ b/qai_hub_models/models/mobilenet_v2/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.mobilenet_v2 import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/mobilenet_v2/export.py b/qai_hub_models/models/mobilenet_v2/export.py index 4162e418..3201e32a 100644 --- a/qai_hub_models/models/mobilenet_v2/export.py +++ b/qai_hub_models/models/mobilenet_v2/export.py @@ -188,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/mobilenet_v2/info.yaml b/qai_hub_models/models/mobilenet_v2/info.yaml index 693da4bb..181e3f7b 100644 --- a/qai_hub_models/models/mobilenet_v2/info.yaml +++ b/qai_hub_models/models/mobilenet_v2/info.yaml @@ -42,3 +42,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/mobilenet_v2/perf.yaml b/qai_hub_models/models/mobilenet_v2/perf.yaml index dfb984fb..bdd64620 100644 --- a/qai_hub_models/models/mobilenet_v2/perf.yaml +++ b/qai_hub_models/models/mobilenet_v2/perf.yaml @@ -36,11 +36,11 @@ models: - name: MobileNet-v2 performance_metrics: - torchscript_onnx_tflite: - inference_time: 938.0 - throughput: 1066.0980810234541 + inference_time: 940.0 + throughput: 1063.8297872340424 estimated_peak_memory_range: - min: 16384 - max: 1921016 + min: 57344 + max: 1721784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jz57dnyq5 + job_id: jopr1ekeg job_status: Passed torchscript_onnx_qnn: - inference_time: 1263.0 - throughput: 791.7656373713381 + inference_time: 1266.0 + throughput: 789.8894154818325 estimated_peak_memory_range: min: 622592 - max: 149245264 + max: 53135336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jo5mzknyp + job_id: j2p0elye5 job_status: Passed torchscript_onnx_ort: - inference_time: 974.0 - throughput: 1026.694045174538 + inference_time: 938.0 + throughput: 1066.0980810234541 estimated_peak_memory_range: - min: 12288 - max: 34226088 + min: 16384 + max: 21567360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jqpyd2zrp + job_id: j1gle3nlp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.683566Z' + timestamp: '2024-06-08T22:49:43Z' - torchscript_onnx_tflite: - inference_time: 642.0 - throughput: 1557.632398753894 + inference_time: 643.0 + throughput: 1555.2099533437015 estimated_peak_memory_range: - min: 12288 - max: 56648848 + min: 0 + max: 58244480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jqp4w4lqg + job_id: jep23l8mg job_status: Passed torchscript_onnx_qnn: - inference_time: 829.0 - throughput: 1206.2726176115802 + inference_time: 826.0 + throughput: 1210.6537530266344 estimated_peak_memory_range: min: 618496 - max: 39436848 + max: 40424432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jegneq0vg + job_id: j1p8wzo8p job_status: Passed torchscript_onnx_ort: - inference_time: 667.0 - throughput: 1499.2503748125937 + inference_time: 666.0 + throughput: 1501.5015015015015 estimated_peak_memory_range: - min: 618496 - max: 26671744 + min: 487424 + max: 27269952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: j2p0r942p + job_id: jw56qn67g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.683632Z' + timestamp: '2024-06-08T22:49:44Z' - torchscript_onnx_tflite: - inference_time: 939.0 - throughput: 1064.9627263045793 + inference_time: 941.0 + throughput: 1062.6992561105208 estimated_peak_memory_range: - min: 28672 - max: 2045656 + min: 20480 + max: 1483664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: j0px1rkjg + job_id: jqpyv6e4p job_status: Passed torchscript_onnx_qnn: - inference_time: 1270.0 - throughput: 787.4015748031496 + inference_time: 1272.0 + throughput: 786.1635220125786 estimated_peak_memory_range: - min: 24576 - max: 51022400 + min: 618496 + max: 41687968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jep2mdxx5 + job_id: jn5q938mp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.683673Z' + timestamp: '2024-06-08T22:49:42Z' - torchscript_onnx_qnn: - inference_time: 1557.0 - throughput: 642.2607578676943 + inference_time: 1555.0 + throughput: 643.0868167202573 estimated_peak_memory_range: - min: 602112 - max: 602112 + min: 1355776 + max: 1355776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: jopryd6vg + job_id: jogkr3zo5 job_status: Passed torchscript_onnx_ort: - inference_time: 1003.0 - throughput: 997.0089730807578 + inference_time: 987.0 + throughput: 1013.1712259371834 estimated_peak_memory_range: - min: 4685824 - max: 4685824 + min: 5607424 + max: 5607424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 105 - job_id: j1p87r2z5 + job_id: j1p3qekz5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.683718Z' + timestamp: '2024-06-08T22:49:45Z' diff --git a/qai_hub_models/models/mobilenet_v2_quantized/README.md b/qai_hub_models/models/mobilenet_v2_quantized/README.md index e8db3017..c2ca082f 100644 --- a/qai_hub_models/models/mobilenet_v2_quantized/README.md +++ b/qai_hub_models/models/mobilenet_v2_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of MobileNet-v2-Quantized can be found [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381) diff --git a/qai_hub_models/models/mobilenet_v2_quantized/evaluate.py b/qai_hub_models/models/mobilenet_v2_quantized/evaluate.py new file mode 100644 index 00000000..76dd0581 --- /dev/null +++ b/qai_hub_models/models/mobilenet_v2_quantized/evaluate.py @@ -0,0 +1,62 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.mobilenet_v2_quantized import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.base_model import BaseModel +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args +from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + + # Use Fp16 model for torch inference + for cls in Model.__mro__: + if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin): + torch_cls = cls + break + torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/mobilenet_v2_quantized/export.py b/qai_hub_models/models/mobilenet_v2_quantized/export.py index 231631ce..b025f312 100644 --- a/qai_hub_models/models/mobilenet_v2_quantized/export.py +++ b/qai_hub_models/models/mobilenet_v2_quantized/export.py @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/mobilenet_v2_quantized/info.yaml b/qai_hub_models/models/mobilenet_v2_quantized/info.yaml index 302fcc0a..973d4b15 100644 --- a/qai_hub_models/models/mobilenet_v2_quantized/info.yaml +++ b/qai_hub_models/models/mobilenet_v2_quantized/info.yaml @@ -42,3 +42,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml b/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml index 499836d3..f7621f54 100644 --- a/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml +++ b/qai_hub_models/models/mobilenet_v2_quantized/perf.yaml @@ -42,11 +42,11 @@ models: - name: MobileNet-v2-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 294.0 - throughput: 3401.360544217687 + inference_time: 291.0 + throughput: 3436.426116838488 estimated_peak_memory_range: - min: 12288 - max: 1579192 + min: 53248 + max: 1718392 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +54,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jn5q21075 + job_id: j1pvzv3mg job_status: Passed torchscript_onnx_qnn: - inference_time: 658.0 - throughput: 1519.756838905775 + inference_time: 647.0 + throughput: 1545.595054095827 estimated_peak_memory_range: - min: 12288 - max: 7011992 + min: 45056 + max: 16933008 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +69,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: j1pvwmq7g + job_id: jmg99wvvg job_status: Passed torchscript_onnx_ort: - inference_time: 640.0 - throughput: 1562.5 + inference_time: 549.0 + throughput: 1821.4936247723133 estimated_peak_memory_range: - min: 32768 - max: 53423304 + min: 12288 + max: 22837192 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 77 + layers_on_npu: 74 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 77 - job_id: jmg9420q5 + total_layers: 74 + job_id: jmg99w1lg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +93,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.718178Z' + timestamp: '2024-06-08T22:50:23Z' - torchscript_onnx_tflite: - inference_time: 207.0 - throughput: 4830.917874396136 + inference_time: 215.0 + throughput: 4651.162790697675 estimated_peak_memory_range: min: 12288 - max: 37431712 + max: 38045216 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +107,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: j1glk84ep + job_id: j7gjkex85 job_status: Passed torchscript_onnx_qnn: - inference_time: 475.0 - throughput: 2105.2631578947367 + inference_time: 474.0 + throughput: 2109.7046413502107 estimated_peak_memory_range: min: 163840 - max: 35567776 + max: 38345936 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +122,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: j7gjlyd7p + job_id: jnp1qe0lg job_status: Passed torchscript_onnx_ort: - inference_time: 485.0 - throughput: 2061.855670103093 + inference_time: 395.0 + throughput: 2531.6455696202534 estimated_peak_memory_range: - min: 0 - max: 21634080 + min: 12288 + max: 23651472 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 77 + layers_on_npu: 74 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 77 - job_id: jnp1812kg + total_layers: 74 + job_id: jnp1qel2g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +146,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.718236Z' + timestamp: '2024-06-08T22:50:24Z' - torchscript_onnx_tflite: - inference_time: 317.0 - throughput: 3154.5741324921137 + inference_time: 301.0 + throughput: 3322.2591362126245 estimated_peak_memory_range: min: 12288 - max: 1549360 + max: 1685448 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +160,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jw561m2vp + job_id: jlpe4k905 job_status: Passed torchscript_onnx_qnn: - inference_time: 653.0 - throughput: 1531.3935681470139 + inference_time: 654.0 + throughput: 1529.051987767584 estimated_peak_memory_range: - min: 24576 - max: 5875912 + min: 16384 + max: 123157128 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +175,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jygz7y2zp + job_id: jz5wmqv6g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +184,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.718274Z' + timestamp: '2024-06-08T22:50:22Z' - torchscript_onnx_tflite: - inference_time: 825.0 - throughput: 1212.121212121212 + inference_time: 850.0 + throughput: 1176.4705882352941 estimated_peak_memory_range: min: 12288 - max: 23357440 + max: 24025456 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,22 +198,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: j1p3m7nxg - job_status: Passed - torchscript_onnx_qnn: - inference_time: 1469.0 - throughput: 680.7351940095303 - estimated_peak_memory_range: - min: 315392 - max: 35338656 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 71 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 71 - job_id: jz5w9zwzp + job_id: jygzvre6p job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -222,13 +207,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-05-29T18:59:41.718310Z' + timestamp: '2024-06-08T22:50:16Z' - torchscript_onnx_tflite: - inference_time: 7302.0 - throughput: 136.9487811558477 + inference_time: 7601.0 + throughput: 131.56163662675965 estimated_peak_memory_range: - min: 118784 - max: 6739360 + min: 253952 + max: 8158832 primary_compute_unit: NPU precision: int8 layer_info: @@ -236,7 +221,7 @@ models: layers_on_gpu: 2 layers_on_cpu: 0 total_layers: 72 - job_id: jwgovwz45 + job_id: jz5wmqojg job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -245,13 +230,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-05-29T18:59:41.718332Z' + timestamp: '2024-06-08T22:50:17Z' - torchscript_onnx_qnn: - inference_time: 771.0 - throughput: 1297.0168612191958 + inference_time: 740.0 + throughput: 1351.3513513513512 estimated_peak_memory_range: - min: 610304 - max: 610304 + min: 696320 + max: 696320 primary_compute_unit: NPU precision: int8 layer_info: @@ -259,22 +244,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jlpevxo75 + job_id: jvgd7owlg job_status: Passed torchscript_onnx_ort: - inference_time: 762.0 - throughput: 1312.3359580052493 + inference_time: 554.0 + throughput: 1805.0541516245487 estimated_peak_memory_range: - min: 19554304 - max: 19554304 + min: 20283392 + max: 20283392 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 77 + layers_on_npu: 74 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 77 - job_id: jvgdv4nkg + total_layers: 74 + job_id: jvgd7o9eg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -283,4 +268,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.718370Z' + timestamp: '2024-06-08T22:50:25Z' diff --git a/qai_hub_models/models/mobilenet_v3_large/README.md b/qai_hub_models/models/mobilenet_v3_large/README.md index bf675b22..3084f4fb 100644 --- a/qai_hub_models/models/mobilenet_v3_large/README.md +++ b/qai_hub_models/models/mobilenet_v3_large/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of MobileNet-v3-Large can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244) diff --git a/qai_hub_models/models/mobilenet_v3_large/evaluate.py b/qai_hub_models/models/mobilenet_v3_large/evaluate.py new file mode 100644 index 00000000..919ac111 --- /dev/null +++ b/qai_hub_models/models/mobilenet_v3_large/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.mobilenet_v3_large import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/mobilenet_v3_large/export.py b/qai_hub_models/models/mobilenet_v3_large/export.py index 40b17a01..da4d660d 100644 --- a/qai_hub_models/models/mobilenet_v3_large/export.py +++ b/qai_hub_models/models/mobilenet_v3_large/export.py @@ -188,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/mobilenet_v3_large/info.yaml b/qai_hub_models/models/mobilenet_v3_large/info.yaml index be6173d9..d276b6db 100644 --- a/qai_hub_models/models/mobilenet_v3_large/info.yaml +++ b/qai_hub_models/models/mobilenet_v3_large/info.yaml @@ -41,3 +41,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/mobilenet_v3_large/perf.yaml b/qai_hub_models/models/mobilenet_v3_large/perf.yaml index c0d437b8..8ea50f02 100644 --- a/qai_hub_models/models/mobilenet_v3_large/perf.yaml +++ b/qai_hub_models/models/mobilenet_v3_large/perf.yaml @@ -36,11 +36,11 @@ models: - name: MobileNet-v3-Large performance_metrics: - torchscript_onnx_tflite: - inference_time: 1003.0 - throughput: 997.0089730807578 + inference_time: 999.0 + throughput: 1001.001001001001 estimated_peak_memory_range: - min: 24576 - max: 17861680 + min: 16384 + max: 1600024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 136 - job_id: jqp4w4nqg + job_id: jqp4jvovp job_status: Passed torchscript_onnx_qnn: - inference_time: 1040.0 - throughput: 961.5384615384615 + inference_time: 1048.0 + throughput: 954.1984732824427 estimated_peak_memory_range: - min: 16384 - max: 57725416 + min: 647168 + max: 48048184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jegneqlvg + job_id: jegnr3yr5 job_status: Passed torchscript_onnx_ort: - inference_time: 1026.0 - throughput: 974.6588693957115 + inference_time: 1039.0 + throughput: 962.4639076034649 estimated_peak_memory_range: min: 12288 - max: 51031736 + max: 82696432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 162 - job_id: j1p87r0z5 + job_id: j2p0elq65 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.761794Z' + timestamp: '2024-06-08T22:50:50Z' - torchscript_onnx_tflite: - inference_time: 678.0 - throughput: 1474.9262536873157 + inference_time: 703.0 + throughput: 1422.475106685633 estimated_peak_memory_range: min: 12288 - max: 61079392 + max: 62391952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 136 - job_id: j0px1r9jg + job_id: j0pxeyj15 job_status: Passed torchscript_onnx_qnn: - inference_time: 716.0 - throughput: 1396.6480446927374 + inference_time: 718.0 + throughput: 1392.757660167131 estimated_peak_memory_range: - min: 0 - max: 49042560 + min: 618496 + max: 51941056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jopryd8vg + job_id: jopr1eq9g job_status: Passed torchscript_onnx_ort: - inference_time: 738.0 - throughput: 1355.0135501355014 + inference_time: 713.0 + throughput: 1402.5245441795232 estimated_peak_memory_range: - min: 12288 - max: 28525824 + min: 618496 + max: 29120336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 162 - job_id: jogky07yp + job_id: j1p8wz9xp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.761876Z' + timestamp: '2024-06-08T22:50:51Z' - torchscript_onnx_tflite: - inference_time: 1002.0 - throughput: 998.003992015968 + inference_time: 1001.0 + throughput: 999.000999000999 estimated_peak_memory_range: - min: 28672 - max: 2042160 + min: 45056 + max: 1507408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 136 - job_id: jo5mzkeyp + job_id: jo5mv32w5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1035.0 - throughput: 966.1835748792271 + inference_time: 1042.0 + throughput: 959.6928982725528 estimated_peak_memory_range: - min: 618496 - max: 26767128 + min: 626688 + max: 69049656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: j2p0r932p + job_id: jqpyv6w7p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.761928Z' + timestamp: '2024-06-08T22:50:49Z' - torchscript_onnx_qnn: - inference_time: 1213.0 - throughput: 824.4023083264633 + inference_time: 1199.0 + throughput: 834.0283569641368 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 144 - job_id: jqpyd2rrp + job_id: jep23l64g job_status: Passed torchscript_onnx_ort: - inference_time: 1116.0 - throughput: 896.0573476702509 + inference_time: 1086.0 + throughput: 920.8103130755064 estimated_peak_memory_range: - min: 62279680 - max: 62279680 + min: 51040256 + max: 51040256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 162 - job_id: jn5q21e75 + job_id: jogkr3n25 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.761984Z' + timestamp: '2024-06-08T22:50:52Z' diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/README.md b/qai_hub_models/models/mobilenet_v3_large_quantized/README.md index b7ace7ce..1feab19d 100644 --- a/qai_hub_models/models/mobilenet_v3_large_quantized/README.md +++ b/qai_hub_models/models/mobilenet_v3_large_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of MobileNet-v3-Large-Quantized can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244) diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/evaluate.py b/qai_hub_models/models/mobilenet_v3_large_quantized/evaluate.py new file mode 100644 index 00000000..39314070 --- /dev/null +++ b/qai_hub_models/models/mobilenet_v3_large_quantized/evaluate.py @@ -0,0 +1,62 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.mobilenet_v3_large_quantized import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.base_model import BaseModel +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args +from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + + # Use Fp16 model for torch inference + for cls in Model.__mro__: + if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin): + torch_cls = cls + break + torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/export.py b/qai_hub_models/models/mobilenet_v3_large_quantized/export.py index ec90b231..7948d791 100644 --- a/qai_hub_models/models/mobilenet_v3_large_quantized/export.py +++ b/qai_hub_models/models/mobilenet_v3_large_quantized/export.py @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/info.yaml b/qai_hub_models/models/mobilenet_v3_large_quantized/info.yaml index 9232ebd6..7d7507ae 100644 --- a/qai_hub_models/models/mobilenet_v3_large_quantized/info.yaml +++ b/qai_hub_models/models/mobilenet_v3_large_quantized/info.yaml @@ -42,3 +42,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml b/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml index a7a39b53..b5cebb9b 100644 --- a/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml +++ b/qai_hub_models/models/mobilenet_v3_large_quantized/perf.yaml @@ -42,11 +42,11 @@ models: - name: MobileNet-v3-Large-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 359.0 - throughput: 2785.515320334262 + inference_time: 371.0 + throughput: 2695.4177897574123 estimated_peak_memory_range: - min: 12288 - max: 1354904 + min: 16384 + max: 1268000 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +54,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 135 - job_id: jwgovwk45 + job_id: j1gle3z8p job_status: Passed torchscript_onnx_qnn: - inference_time: 628.0 - throughput: 1592.3566878980891 + inference_time: 622.0 + throughput: 1607.717041800643 estimated_peak_memory_range: min: 16384 - max: 51512504 + max: 12184136 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +69,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jz5w9z2zp + job_id: j7gjkemx5 job_status: Passed torchscript_onnx_ort: - inference_time: 5349.0 - throughput: 186.9508319312021 + inference_time: 5186.0 + throughput: 192.8268414963363 estimated_peak_memory_range: - min: 18890752 - max: 36685080 + min: 18886656 + max: 272750360 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 150 + layers_on_npu: 147 layers_on_gpu: 0 layers_on_cpu: 24 - total_layers: 174 - job_id: jqp4w4kqg + total_layers: 171 + job_id: jmg99welg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +93,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.796254Z' + timestamp: '2024-06-08T22:51:35Z' - torchscript_onnx_tflite: inference_time: 255.0 throughput: 3921.5686274509803 estimated_peak_memory_range: min: 12288 - max: 47702144 + max: 48279952 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +107,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 135 - job_id: j1pvwm07g + job_id: jw56qnj0g job_status: Passed torchscript_onnx_qnn: - inference_time: 456.0 - throughput: 2192.9824561403507 + inference_time: 451.0 + throughput: 2217.2949002217297 estimated_peak_memory_range: min: 163840 - max: 45563744 + max: 50970896 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +122,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jmg942jq5 + job_id: jlpe4k115 job_status: Passed torchscript_onnx_ort: - inference_time: 4341.0 - throughput: 230.361667818475 + inference_time: 4385.0 + throughput: 228.05017103762827 estimated_peak_memory_range: - min: 16400384 - max: 54777648 + min: 17133568 + max: 61050864 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 150 + layers_on_npu: 147 layers_on_gpu: 0 layers_on_cpu: 24 - total_layers: 174 - job_id: j0px1rnjg + total_layers: 171 + job_id: jnp1qex2g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +146,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.796335Z' + timestamp: '2024-06-08T22:51:36Z' - torchscript_onnx_tflite: inference_time: 353.0 throughput: 2832.8611898016998 estimated_peak_memory_range: min: 12288 - max: 1793632 + max: 2106960 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +160,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 135 - job_id: j7gjlyz7p + job_id: j1p3qe3l5 job_status: Passed torchscript_onnx_qnn: - inference_time: 627.0 - throughput: 1594.896331738437 + inference_time: 626.0 + throughput: 1597.444089456869 estimated_peak_memory_range: - min: 16384 - max: 15890856 + min: 184320 + max: 6302512 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +175,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jvgdv4ekg + job_id: jz5wmqn6g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +184,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.796386Z' + timestamp: '2024-06-08T22:51:34Z' - torchscript_onnx_tflite: - inference_time: 1160.0 - throughput: 862.0689655172414 + inference_time: 1170.0 + throughput: 854.7008547008547 estimated_peak_memory_range: min: 12288 - max: 28257056 + max: 28920160 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,22 +198,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 135 - job_id: jlpevxe75 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 1608.0 - throughput: 621.8905472636816 - estimated_peak_memory_range: - min: 12288 - max: 49331408 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 126 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 126 - job_id: jz57dn0q5 + job_id: jwgoe30xp job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -222,13 +207,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-05-29T18:59:41.796435Z' + timestamp: '2024-06-08T22:51:29Z' - torchscript_onnx_tflite: - inference_time: 6893.0 - throughput: 145.0747134774409 + inference_time: 6878.0 + throughput: 145.39110206455365 estimated_peak_memory_range: min: 45056 - max: 7248032 + max: 2149272 primary_compute_unit: NPU precision: int8 layer_info: @@ -236,7 +221,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 135 - job_id: jygz7yozp + job_id: j1pvzvojg job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -245,13 +230,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-05-29T18:59:41.796463Z' + timestamp: '2024-06-08T22:51:30Z' - torchscript_onnx_qnn: - inference_time: 713.0 - throughput: 1402.5245441795232 + inference_time: 716.0 + throughput: 1396.6480446927374 estimated_peak_memory_range: - min: 569344 - max: 569344 + min: 643072 + max: 643072 primary_compute_unit: NPU precision: int8 layer_info: @@ -259,22 +244,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jnp181ykg + job_id: jygzvr9kp job_status: Passed torchscript_onnx_ort: - inference_time: 4668.0 - throughput: 214.22450728363324 + inference_time: 4701.0 + throughput: 212.72069772388852 estimated_peak_memory_range: - min: 16908288 - max: 16908288 + min: 26042368 + max: 26042368 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 150 + layers_on_npu: 147 layers_on_gpu: 0 layers_on_cpu: 24 - total_layers: 174 - job_id: jo5mzkqyp + total_layers: 171 + job_id: jvgd7oleg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -283,4 +268,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.796520Z' + timestamp: '2024-06-08T22:51:37Z' diff --git a/qai_hub_models/models/mobilenet_v3_small/README.md b/qai_hub_models/models/mobilenet_v3_small/README.md index 9d058839..bea9dec8 100644 --- a/qai_hub_models/models/mobilenet_v3_small/README.md +++ b/qai_hub_models/models/mobilenet_v3_small/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of MobileNet-v3-Small can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244) diff --git a/qai_hub_models/models/mobilenet_v3_small/evaluate.py b/qai_hub_models/models/mobilenet_v3_small/evaluate.py new file mode 100644 index 00000000..5ebb691f --- /dev/null +++ b/qai_hub_models/models/mobilenet_v3_small/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.mobilenet_v3_small import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/mobilenet_v3_small/export.py b/qai_hub_models/models/mobilenet_v3_small/export.py index 70b1d899..f7fe3203 100644 --- a/qai_hub_models/models/mobilenet_v3_small/export.py +++ b/qai_hub_models/models/mobilenet_v3_small/export.py @@ -188,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/mobilenet_v3_small/info.yaml b/qai_hub_models/models/mobilenet_v3_small/info.yaml index 8984b9c8..67c610bc 100644 --- a/qai_hub_models/models/mobilenet_v3_small/info.yaml +++ b/qai_hub_models/models/mobilenet_v3_small/info.yaml @@ -41,3 +41,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/mobilenet_v3_small/perf.yaml b/qai_hub_models/models/mobilenet_v3_small/perf.yaml index 7bf1b957..10857f0f 100644 --- a/qai_hub_models/models/mobilenet_v3_small/perf.yaml +++ b/qai_hub_models/models/mobilenet_v3_small/perf.yaml @@ -36,11 +36,11 @@ models: - name: MobileNet-v3-Small performance_metrics: - torchscript_onnx_tflite: - inference_time: 834.0 - throughput: 1199.0407673860911 + inference_time: 835.0 + throughput: 1197.6047904191616 estimated_peak_memory_range: - min: 24576 - max: 1878808 + min: 16384 + max: 1873408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 124 - job_id: jopryd2vg + job_id: jqp4jv0vp job_status: Passed torchscript_onnx_qnn: - inference_time: 866.0 - throughput: 1154.7344110854503 + inference_time: 882.0 + throughput: 1133.7868480725624 estimated_peak_memory_range: min: 16384 - max: 45639048 + max: 13725872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j2p0r922p + job_id: jegnr38r5 job_status: Passed torchscript_onnx_ort: - inference_time: 826.0 - throughput: 1210.6537530266344 + inference_time: 824.0 + throughput: 1213.5922330097087 estimated_peak_memory_range: min: 12288 - max: 75798856 + max: 57762312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: j1glk82ep + job_id: j2p0el765 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.840059Z' + timestamp: '2024-06-08T22:52:03Z' - torchscript_onnx_tflite: - inference_time: 568.0 - throughput: 1760.5633802816901 + inference_time: 547.0 + throughput: 1828.1535648994516 estimated_peak_memory_range: - min: 16384 - max: 41065712 + min: 12288 + max: 42129856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 124 - job_id: jep2md9x5 + job_id: j0pxey215 job_status: Passed torchscript_onnx_qnn: inference_time: 583.0 throughput: 1715.2658662092624 estimated_peak_memory_range: - min: 12288 - max: 43536656 + min: 0 + max: 47338784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j1p87rmz5 + job_id: jopr1ej9g job_status: Passed torchscript_onnx_ort: - inference_time: 554.0 - throughput: 1805.0541516245487 + inference_time: 586.0 + throughput: 1706.4846416382252 estimated_peak_memory_range: - min: 19972096 - max: 46699072 + min: 524288 + max: 27846320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jw561mzvp + job_id: j1p8wzvxp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.840135Z' + timestamp: '2024-06-08T22:52:04Z' - torchscript_onnx_tflite: - inference_time: 836.0 - throughput: 1196.1722488038276 + inference_time: 832.0 + throughput: 1201.923076923077 estimated_peak_memory_range: - min: 28672 - max: 1727808 + min: 24576 + max: 2336768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 124 - job_id: jqpyd2jrp + job_id: jo5mv3yw5 job_status: Passed torchscript_onnx_qnn: - inference_time: 868.0 - throughput: 1152.073732718894 + inference_time: 867.0 + throughput: 1153.4025374855826 estimated_peak_memory_range: min: 12288 - max: 24180000 + max: 35394896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jn5q21r75 + job_id: jqpyv607p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.840181Z' + timestamp: '2024-06-08T22:52:02Z' - torchscript_onnx_qnn: - inference_time: 1065.0 - throughput: 938.9671361502348 + inference_time: 1018.0 + throughput: 982.3182711198428 estimated_peak_memory_range: - min: 602112 - max: 602112 + min: 1249280 + max: 1249280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jogky0qyp + job_id: jep23ln4g job_status: Passed torchscript_onnx_ort: - inference_time: 871.0 - throughput: 1148.105625717566 + inference_time: 879.0 + throughput: 1137.6564277588168 estimated_peak_memory_range: - min: 16216064 - max: 16216064 + min: 16596992 + max: 16596992 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: j1p3m71xg + job_id: jogkr3m25 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.840231Z' + timestamp: '2024-06-08T22:52:05Z' diff --git a/qai_hub_models/models/openai_clip/README.md b/qai_hub_models/models/openai_clip/README.md index 06c429e8..0455ec79 100644 --- a/qai_hub_models/models/openai_clip/README.md +++ b/qai_hub_models/models/openai_clip/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of OpenAI-Clip can be found [here](https://github.com/openai/CLIP/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Learning Transferable Visual Models From Natural Language Supervision](https://arxiv.org/abs/2103.00020) diff --git a/qai_hub_models/models/openai_clip/export.py b/qai_hub_models/models/openai_clip/export.py index 4cc801ca..08ce7d6a 100644 --- a/qai_hub_models/models/openai_clip/export.py +++ b/qai_hub_models/models/openai_clip/export.py @@ -194,7 +194,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/openai_clip/perf.yaml b/qai_hub_models/models/openai_clip/perf.yaml index da7d3a0d..785f83c0 100644 --- a/qai_hub_models/models/openai_clip/perf.yaml +++ b/qai_hub_models/models/openai_clip/perf.yaml @@ -36,11 +36,11 @@ models: - name: CLIPTextEncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 13251.0 - throughput: 75.4660025658441 + inference_time: 13293.0 + throughput: 75.22756337922215 estimated_peak_memory_range: min: 20480 - max: 3401864 + max: 3340864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 576 - job_id: j1pvwmr7g + job_id: j1gle3r8p job_status: Passed torchscript_onnx_qnn: - inference_time: 7849.0 - throughput: 127.40476493820869 + inference_time: 7810.0 + throughput: 128.04097311139566 estimated_peak_memory_range: - min: 16384 - max: 25143840 + min: 24576 + max: 31351376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 377 - job_id: jnp181wkg + job_id: jlpe4k315 job_status: Passed torchscript_onnx_ort: - inference_time: 31734.0 - throughput: 31.511943026407007 + inference_time: 31397.0 + throughput: 31.850176768481067 estimated_peak_memory_range: - min: 65536 - max: 333763384 + min: 57344 + max: 324810128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 389 - job_id: jep2mdlx5 + job_id: j0pxe1o15 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.874657Z' + timestamp: '2024-06-08T22:53:04Z' - torchscript_onnx_tflite: - inference_time: 9373.0 - throughput: 106.68942707777659 + inference_time: 9408.0 + throughput: 106.29251700680273 estimated_peak_memory_range: - min: 0 - max: 209589136 + min: 36864 + max: 211531120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 576 - job_id: jlpevxw75 + job_id: j1p3qe2l5 job_status: Passed torchscript_onnx_qnn: - inference_time: 5487.0 - throughput: 182.2489520685256 + inference_time: 5496.0 + throughput: 181.9505094614265 estimated_peak_memory_range: min: 12288 - max: 139438448 + max: 143518544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 377 - job_id: jz57dnlq5 + job_id: jz5wm966g job_status: Passed torchscript_onnx_ort: - inference_time: 22304.0 - throughput: 44.83500717360115 + inference_time: 22333.0 + throughput: 44.776787713249455 estimated_peak_memory_range: - min: 40960 - max: 187710464 + min: 36864 + max: 188583968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 389 - job_id: j2p0r9l2p + job_id: jegnrevr5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.874827Z' + timestamp: '2024-06-08T22:53:06Z' - torchscript_onnx_tflite: - inference_time: 13297.0 - throughput: 75.2049334436339 + inference_time: 13221.0 + throughput: 75.6372437788367 estimated_peak_memory_range: - min: 65536 - max: 3472688 + min: 40960 + max: 2903592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 2 total_layers: 576 - job_id: jz5w9z3zp + job_id: j1pvzvxjg job_status: Passed torchscript_onnx_qnn: - inference_time: 7821.0 - throughput: 127.86088735455824 + inference_time: 7775.0 + throughput: 128.61736334405145 estimated_peak_memory_range: - min: 20480 - max: 26256120 + min: 16384 + max: 18711280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 377 - job_id: jegneq3vg + job_id: jz57vdrl5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.874948Z' + timestamp: '2024-06-08T22:53:02Z' - torchscript_onnx_qnn: - inference_time: 8459.0 - throughput: 118.21728336682823 + inference_time: 8431.0 + throughput: 118.60989206499822 estimated_peak_memory_range: - min: 147456 - max: 147456 + min: 159744 + max: 159744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 377 - job_id: j0px1r6jg + job_id: jnp1q8z2g job_status: Passed torchscript_onnx_ort: - inference_time: 32955.0 - throughput: 30.344409042633895 + inference_time: 32547.0 + throughput: 30.724797984453254 estimated_peak_memory_range: - min: 332324864 - max: 332324864 + min: 40755200 + max: 40755200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 389 - job_id: jogky03yp + job_id: jep23my4g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,15 +216,15 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.875049Z' + timestamp: '2024-06-08T22:53:08Z' - name: CLIPImageEncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 126637.0 - throughput: 7.896586305740029 + inference_time: 126539.0 + throughput: 7.902701933791163 estimated_peak_memory_range: - min: 135168 - max: 3687832 + min: 0 + max: 273708336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -232,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 576 - job_id: j7gjly27p + job_id: jw56qnl0g job_status: Passed torchscript_onnx_qnn: - inference_time: 50638.0 - throughput: 19.748015324459892 + inference_time: 50274.0 + throughput: 19.890997334606357 estimated_peak_memory_range: - min: 45056 - max: 64082216 + min: 126976 + max: 66170792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -247,22 +247,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 371 - job_id: jvgdv4qkg - job_status: Passed - torchscript_onnx_ort: - inference_time: 171916.0 - throughput: 5.8167942483538475 - estimated_peak_memory_range: - min: 16384 - max: 538222224 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 382 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 382 - job_id: jqpyd26rp + job_id: jygzvrkkp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -271,13 +256,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.875215Z' + timestamp: '2024-06-08T22:52:57Z' - torchscript_onnx_tflite: - inference_time: 96011.0 - throughput: 10.41547322702607 + inference_time: 96320.0 + throughput: 10.382059800664452 estimated_peak_memory_range: - min: 192512 - max: 749633296 + min: 188416 + max: 752672896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -285,14 +270,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 576 - job_id: jygz7yjzp + job_id: jwgoe3qxp job_status: Passed torchscript_onnx_qnn: - inference_time: 37501.0 - throughput: 26.665955574518012 + inference_time: 37784.0 + throughput: 26.46622909167902 estimated_peak_memory_range: - min: 655360 - max: 195743056 + min: 634880 + max: 197848448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -300,14 +285,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 371 - job_id: jqp4w4dqg + job_id: jmg994nlg job_status: Passed torchscript_onnx_ort: - inference_time: 131686.0 - throughput: 7.593821666691979 + inference_time: 129578.0 + throughput: 7.717359428298013 estimated_peak_memory_range: - min: 618496 - max: 1275686304 + min: 659456 + max: 1273480192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -315,7 +300,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 382 - job_id: j1p87rzz5 + job_id: jopr1y39g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -324,13 +309,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.875378Z' + timestamp: '2024-06-08T22:53:07Z' - torchscript_onnx_tflite: - inference_time: 126479.0 - throughput: 7.906450873267499 + inference_time: 125864.0 + throughput: 7.945083582279286 estimated_peak_memory_range: - min: 0 - max: 274583072 + min: 143360 + max: 4010376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -338,14 +323,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 576 - job_id: jmg942yq5 + job_id: j7gjke4x5 job_status: Passed torchscript_onnx_qnn: - inference_time: 50667.0 - throughput: 19.736712258471982 + inference_time: 50577.0 + throughput: 19.771833046641753 estimated_peak_memory_range: - min: 65536 - max: 59760816 + min: 77824 + max: 66028648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -353,7 +338,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 371 - job_id: joprydevg + job_id: jqp4jwrvp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -362,10 +347,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.875496Z' + timestamp: '2024-06-08T22:53:03Z' - torchscript_onnx_qnn: - inference_time: 48879.0 - throughput: 20.458683688291494 + inference_time: 48611.0 + throughput: 20.57147559194421 estimated_peak_memory_range: min: 602112 max: 602112 @@ -376,14 +361,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 369 - job_id: jo5mzk6yp + job_id: jvgd7v1eg job_status: Passed torchscript_onnx_ort: - inference_time: 168534.0 - throughput: 5.933520832591643 + inference_time: 168455.0 + throughput: 5.936303463833071 estimated_peak_memory_range: - min: 492019712 - max: 492019712 + min: 468086784 + max: 468086784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -391,7 +376,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 382 - job_id: jn5q21375 + job_id: jqpyvd37p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -400,4 +385,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.875594Z' + timestamp: '2024-06-08T22:53:08Z' diff --git a/qai_hub_models/models/openpose/README.md b/qai_hub_models/models/openpose/README.md index f5b7c4f4..ad423618 100644 --- a/qai_hub_models/models/openpose/README.md +++ b/qai_hub_models/models/openpose/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of OpenPose can be found [here](https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [OpenPose: Realtime Multi-Person 2D Pose Estimation using Part Affinity Fields](https://arxiv.org/abs/1812.08008) diff --git a/qai_hub_models/models/openpose/export.py b/qai_hub_models/models/openpose/export.py index c37d9c3d..18061fc5 100644 --- a/qai_hub_models/models/openpose/export.py +++ b/qai_hub_models/models/openpose/export.py @@ -188,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/openpose/perf.yaml b/qai_hub_models/models/openpose/perf.yaml index acbfb747..30e2f47e 100644 --- a/qai_hub_models/models/openpose/perf.yaml +++ b/qai_hub_models/models/openpose/perf.yaml @@ -36,11 +36,11 @@ models: - name: OpenPose performance_metrics: - torchscript_onnx_tflite: - inference_time: 11695.0 - throughput: 85.50662676357418 + inference_time: 12008.0 + throughput: 83.27781479013991 estimated_peak_memory_range: - min: 200704 - max: 2684240 + min: 217088 + max: 2747920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: j1p3m7exg + job_id: jogkryx25 job_status: Passed torchscript_onnx_qnn: - inference_time: 11773.0 - throughput: 84.94011721736176 + inference_time: 11771.0 + throughput: 84.95454931611587 estimated_peak_memory_range: - min: 634880 - max: 241091432 + min: 45056 + max: 240267896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: j7gjlye7p + job_id: jw56q140g job_status: Passed torchscript_onnx_ort: - inference_time: 12281.0 - throughput: 81.4265939255761 + inference_time: 11936.0 + throughput: 83.78016085790885 estimated_peak_memory_range: - min: 2134016 - max: 399666784 + min: 0 + max: 374382256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jmg942wq5 + job_id: j7gjklnx5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.938307Z' + timestamp: '2024-06-08T22:55:03Z' - torchscript_onnx_tflite: - inference_time: 8716.0 - throughput: 114.73152822395595 + inference_time: 8742.0 + throughput: 114.39029970258522 estimated_peak_memory_range: - min: 212992 - max: 35478464 + min: 12288 + max: 33837760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: jwgovw345 + job_id: jn5q92q4p job_status: Passed torchscript_onnx_qnn: - inference_time: 8753.0 - throughput: 114.24654404204273 + inference_time: 8755.0 + throughput: 114.22044545973729 estimated_peak_memory_range: min: 618496 - max: 54462240 + max: 53012064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jlpevxk75 + job_id: j1p3qm0l5 job_status: Passed torchscript_onnx_ort: - inference_time: 9089.0 - throughput: 110.02310485201892 + inference_time: 9006.0 + throughput: 111.0370863868532 estimated_peak_memory_range: - min: 634880 - max: 30320816 + min: 700416 + max: 31196368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jnp181ekg + job_id: jlpe4vm15 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.938390Z' + timestamp: '2024-06-08T22:55:04Z' - torchscript_onnx_tflite: - inference_time: 11834.0 - throughput: 84.50228156160216 + inference_time: 11695.0 + throughput: 85.50662676357418 estimated_peak_memory_range: - min: 229376 - max: 2876192 + min: 196608 + max: 2975008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 103 - job_id: j1pvwmv7g + job_id: j1glekm8p job_status: Passed torchscript_onnx_qnn: - inference_time: 11787.0 - throughput: 84.83922965979468 + inference_time: 11765.0 + throughput: 84.99787505312368 estimated_peak_memory_range: - min: 618496 - max: 240822560 + min: 12288 + max: 229599440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jz5w9zqzp + job_id: j1pvzwkjg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.938443Z' + timestamp: '2024-06-08T22:55:02Z' - torchscript_onnx_qnn: - inference_time: 14114.0 - throughput: 70.85163667280715 + inference_time: 14100.0 + throughput: 70.92198581560284 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jygz7yrzp + job_id: jwgoev6xp job_status: Passed torchscript_onnx_ort: - inference_time: 12351.0 - throughput: 80.96510404015869 + inference_time: 12365.0 + throughput: 80.87343307723413 estimated_peak_memory_range: - min: 93835264 - max: 93835264 + min: 88932352 + max: 88932352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 189 - job_id: jvgdv4okg + job_id: jygzv7dkp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.938505Z' + timestamp: '2024-06-08T22:55:05Z' diff --git a/qai_hub_models/models/posenet_mobilenet/README.md b/qai_hub_models/models/posenet_mobilenet/README.md index ff11e607..8f4ea678 100644 --- a/qai_hub_models/models/posenet_mobilenet/README.md +++ b/qai_hub_models/models/posenet_mobilenet/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Posenet-Mobilenet can be found [here](https://github.com/rwightman/posenet-pytorch/blob/master/LICENSE.txt). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [PersonLab: Person Pose Estimation and Instance Segmentation with a Bottom-Up, Part-Based, Geometric Embedding Model](https://arxiv.org/abs/1803.08225) diff --git a/qai_hub_models/models/posenet_mobilenet/app.py b/qai_hub_models/models/posenet_mobilenet/app.py index 2ccca2f2..72933415 100644 --- a/qai_hub_models/models/posenet_mobilenet/app.py +++ b/qai_hub_models/models/posenet_mobilenet/app.py @@ -582,7 +582,7 @@ def predict_pose_keypoints( keypoint_scores, keypoint_coords, min_pose_score=0.25, - min_part_score=0.25, + min_part_score=0.1, ) image_result = Image.fromarray(output_arr) return pil_undo_resize_pad(image_result, original_size, scale, padding) diff --git a/qai_hub_models/models/posenet_mobilenet/export.py b/qai_hub_models/models/posenet_mobilenet/export.py index 9fa56461..e178937d 100644 --- a/qai_hub_models/models/posenet_mobilenet/export.py +++ b/qai_hub_models/models/posenet_mobilenet/export.py @@ -186,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/posenet_mobilenet/info.yaml b/qai_hub_models/models/posenet_mobilenet/info.yaml index beaeaabb..eac8b0bd 100644 --- a/qai_hub_models/models/posenet_mobilenet/info.yaml +++ b/qai_hub_models/models/posenet_mobilenet/info.yaml @@ -16,7 +16,7 @@ deploy_license: source_repo: https://github.com/rwightman/posenet-pytorch technical_details: Model checkpoint: mobilenet_v1_101 - Input resolution: 257x193 + Input resolution: 513x257 Number of parameters: 3.31M Model size: 12.7 MB applicable_scenarios: @@ -33,7 +33,7 @@ related_models: - hrnet_pose has_static_banner: yes has_animated_banner: yes -license_type: other +license_type: apache-2.0 deploy_license_type: AI Model Hub License dataset: - coco diff --git a/qai_hub_models/models/posenet_mobilenet/perf.yaml b/qai_hub_models/models/posenet_mobilenet/perf.yaml index e55112e4..e405cdc0 100644 --- a/qai_hub_models/models/posenet_mobilenet/perf.yaml +++ b/qai_hub_models/models/posenet_mobilenet/perf.yaml @@ -39,8 +39,8 @@ models: inference_time: 1387.0 throughput: 720.9805335255949 estimated_peak_memory_range: - min: 16384 - max: 1622952 + min: 12288 + max: 1654968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jnp181elg + job_id: jmg994llg job_status: Passed torchscript_onnx_qnn: - inference_time: 1436.0 - throughput: 696.3788300835655 + inference_time: 1439.0 + throughput: 694.9270326615705 estimated_peak_memory_range: - min: 12288 - max: 67879624 + min: 20480 + max: 24010176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jqp4w4wlg + job_id: jz57vdyl5 job_status: Passed torchscript_onnx_ort: - inference_time: 2081.0 - throughput: 480.5382027871216 + inference_time: 2086.0 + throughput: 479.3863854266539 estimated_peak_memory_range: min: 12288 - max: 19904304 + max: 25676680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: joprydyeg + job_id: jegnre6r5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:41.972787Z' + timestamp: '2024-06-08T22:55:33Z' - torchscript_onnx_tflite: - inference_time: 973.0 - throughput: 1027.749229188078 + inference_time: 977.0 + throughput: 1023.5414534288639 estimated_peak_memory_range: min: 12288 - max: 35518112 + max: 36616768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jvgdv4olg + job_id: jnp1q842g job_status: Passed torchscript_onnx_qnn: inference_time: 1010.0 throughput: 990.0990099009902 estimated_peak_memory_range: min: 1597440 - max: 33875456 + max: 36578000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: j0px1r19g + job_id: jqp4jwlvp job_status: Passed torchscript_onnx_ort: - inference_time: 1543.0 - throughput: 648.0881399870383 + inference_time: 1404.0 + throughput: 712.2507122507122 estimated_peak_memory_range: - min: 937984 - max: 21691776 + min: 1597440 + max: 24142448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jep2mdmm5 + job_id: jopr1yv9g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:41.972843Z' + timestamp: '2024-06-08T22:55:34Z' - torchscript_onnx_tflite: - inference_time: 1391.0 - throughput: 718.9072609633357 + inference_time: 1388.0 + throughput: 720.4610951008646 estimated_peak_memory_range: - min: 16384 - max: 3554688 + min: 12288 + max: 1476976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jz57dndr5 + job_id: jvgd7vxeg job_status: Passed torchscript_onnx_qnn: - inference_time: 1437.0 - throughput: 695.8942240779402 + inference_time: 1447.0 + throughput: 691.0850034554251 estimated_peak_memory_range: - min: 28672 - max: 13380040 + min: 16384 + max: 13954296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jegneqemg + job_id: jo5mvznw5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:41.972876Z' + timestamp: '2024-06-08T22:55:32Z' - torchscript_onnx_qnn: - inference_time: 1748.0 - throughput: 572.0823798627002 + inference_time: 1751.0 + throughput: 571.1022272986864 estimated_peak_memory_range: min: 1589248 max: 1589248 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 69 - job_id: jo5mzkzqp + job_id: j0pxe1k15 job_status: Passed torchscript_onnx_ort: - inference_time: 2133.0 - throughput: 468.8232536333802 + inference_time: 2129.0 + throughput: 469.7040864255519 estimated_peak_memory_range: - min: 159744 - max: 159744 + min: 151552 + max: 151552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jqpyd2d4p + job_id: jep23mk4g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:41.972913Z' + timestamp: '2024-06-08T22:55:35Z' diff --git a/qai_hub_models/models/protocols.py b/qai_hub_models/models/protocols.py index 1d79a391..ac1352af 100644 --- a/qai_hub_models/models/protocols.py +++ b/qai_hub_models/models/protocols.py @@ -33,10 +33,10 @@ from qai_hub_models.utils.input_spec import InputSpec FromPretrainedTypeVar = TypeVar("FromPretrainedTypeVar", bound="FromPretrainedProtocol") - FromPrecompiledTypeVar = TypeVar( "FromPrecompiledTypeVar", bound="FromPrecompiledProtocol" ) +HubModelProtocolTypeVar = TypeVar("HubModelProtocolTypeVar", bound="HubModelProtocol") class HubModelProtocol(Protocol): diff --git a/qai_hub_models/models/quicksrnetlarge/README.md b/qai_hub_models/models/quicksrnetlarge/README.md index 12c61b60..528f3c94 100644 --- a/qai_hub_models/models/quicksrnetlarge/README.md +++ b/qai_hub_models/models/quicksrnetlarge/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of QuickSRNetLarge can be found [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms](https://arxiv.org/abs/2303.04336) diff --git a/qai_hub_models/models/quicksrnetlarge/demo.py b/qai_hub_models/models/quicksrnetlarge/demo.py index 12d688c3..70114874 100644 --- a/qai_hub_models/models/quicksrnetlarge/demo.py +++ b/qai_hub_models/models/quicksrnetlarge/demo.py @@ -3,16 +3,7 @@ # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- from qai_hub_models.models._shared.super_resolution.demo import super_resolution_demo -from qai_hub_models.models.quicksrnetlarge.model import ( - MODEL_ASSET_VERSION, - MODEL_ID, - QuickSRNetLarge, -) -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset - -IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetlarge_demo.jpg" -) +from qai_hub_models.models.quicksrnetlarge.model import MODEL_ID, QuickSRNetLarge # Run QuickSRNet end-to-end on a sample image. @@ -21,7 +12,6 @@ def main(is_test: bool = False): super_resolution_demo( model_cls=QuickSRNetLarge, model_id=MODEL_ID, - default_image=IMAGE_ADDRESS, is_test=is_test, ) diff --git a/qai_hub_models/models/quicksrnetlarge/export.py b/qai_hub_models/models/quicksrnetlarge/export.py index d21f0eab..5f3ec808 100644 --- a/qai_hub_models/models/quicksrnetlarge/export.py +++ b/qai_hub_models/models/quicksrnetlarge/export.py @@ -187,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/quicksrnetlarge/info.yaml b/qai_hub_models/models/quicksrnetlarge/info.yaml index b9cc532d..b139e358 100644 --- a/qai_hub_models/models/quicksrnetlarge/info.yaml +++ b/qai_hub_models/models/quicksrnetlarge/info.yaml @@ -12,13 +12,14 @@ research_paper: https://arxiv.org/abs/2303.04336 research_paper_title: 'QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms' license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf -deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf +deploy_license: + https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet technical_details: - Model checkpoint: quicksrnet_large_4x_checkpoint_float32 - Input resolution: 128x128 - Number of parameters: 436K - Model size: 1.67 MB + Model checkpoint: quicksrnet_large_3x_checkpoint + Input resolution: 640x360 + Number of parameters: 424K + Model size: 1.63 MB applicable_scenarios: - Virtual Real Estate Tours - Gaming diff --git a/qai_hub_models/models/quicksrnetlarge/model.py b/qai_hub_models/models/quicksrnetlarge/model.py index bac993cc..6a83e660 100644 --- a/qai_hub_models/models/quicksrnetlarge/model.py +++ b/qai_hub_models/models/quicksrnetlarge/model.py @@ -4,85 +4,48 @@ # --------------------------------------------------------------------- from __future__ import annotations -import torch +from pathlib import Path -from qai_hub_models.evaluators.base_evaluators import BaseEvaluator -from qai_hub_models.evaluators.superres_evaluator import SuperResolutionOutputEvaluator from qai_hub_models.models._shared.quicksrnet.common import ( _load_quicksrnet_source_model, ) -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset -from qai_hub_models.utils.base_model import BaseModel -from qai_hub_models.utils.input_spec import InputSpec +from qai_hub_models.models._shared.super_resolution.model import ( + DEFAULT_SCALE_FACTOR, + SuperResolutionModel, + validate_scale_factor, +) +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_torch MODEL_ID = __name__.split(".")[-2] MODEL_ASSET_VERSION = 2 -# Weights and config stored in S3 are sourced from -# https://github.com/quic/aimet-model-zoo/blob/develop/aimet_zoo_torch/quicksrnet/model/model_cards/quicksrnet_large_4x_w8a8.json -# https://github.com/quic/aimet-model-zoo/releases/download/phase_2_january_artifacts/quicksrnet_large_4x_checkpoint_float32.pth.tar -QUICKSRNET_WEIGHTS = "quicksrnet_large_4x_checkpoint_float32.pth.tar" -SCALING_FACTOR = 4 +BASE_ASSET_URL = "https://github.com/quic/aimet-model-zoo/releases/download/phase_2_january_artifacts/quicksrnet_large_{scale_factor}x_checkpoint_float32.pth.tar" NUM_CHANNELS = 64 NUM_INTERMEDIATE_LAYERS = 11 -USE_ITO_CONNECTION = True - -class QuickSRNetLarge(BaseModel): - """Exportable QuickSRNet-Large upscaler, end-to-end.""" - def __init__( - self, - quicksrnet_model: torch.nn.Module, - ) -> None: - super().__init__() - self.model = quicksrnet_model +class QuickSRNetLarge(SuperResolutionModel): + """Exportable QuickSRNetLarge super resolution model, end-to-end.""" @classmethod - def from_pretrained(cls) -> QuickSRNetLarge: + def from_pretrained( + cls, scale_factor: int = DEFAULT_SCALE_FACTOR + ) -> QuickSRNetLarge: + validate_scale_factor(scale_factor) model = _load_quicksrnet_source_model( - SCALING_FACTOR, + scale_factor, NUM_CHANNELS, NUM_INTERMEDIATE_LAYERS, - USE_ITO_CONNECTION, + use_ito_connection=True, + ) + url = BASE_ASSET_URL.format(scale_factor=scale_factor) + checkpoint_asset = CachedWebModelAsset( + url, + MODEL_ID, + MODEL_ASSET_VERSION, + Path(url).name, ) - dst = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, QUICKSRNET_WEIGHTS - ).fetch() - checkpoint = torch.load(dst, map_location=torch.device("cpu")) + checkpoint = load_torch(checkpoint_asset) model.load_state_dict(checkpoint["state_dict"]) model.eval() - return cls(model) - - def get_evaluator(self) -> BaseEvaluator: - return SuperResolutionOutputEvaluator() - - def forward(self, image): - """ - Run QuickSRNet-Large on `image`, and produce an upscaled image - - Parameters: - image: Pixel values pre-processed for model consumption. - Range: float[0, 1] - 3-channel Color Space: RGB - - Returns: - image: Pixel values - Range: float[0, 1] - 3-channel Color Space: RGB - """ - - return self.model(image) - - @staticmethod - def get_input_spec( - batch_size: int = 1, - num_channels: int = 3, - height: int = 128, - width: int = 128, - ) -> InputSpec: - # Get the input specification ordered (name -> (shape, type)) pairs for this model. - # - # This can be used with the qai_hub python API to declare - # the model input specification upon submitting a profile job. - return {"image": ((batch_size, num_channels, height, width), "float32")} + return cls(model, scale_factor) diff --git a/qai_hub_models/models/quicksrnetlarge/perf.yaml b/qai_hub_models/models/quicksrnetlarge/perf.yaml index cb348dbc..4ca0e8f0 100644 --- a/qai_hub_models/models/quicksrnetlarge/perf.yaml +++ b/qai_hub_models/models/quicksrnetlarge/perf.yaml @@ -36,11 +36,11 @@ models: - name: QuickSRNetLarge performance_metrics: - torchscript_onnx_tflite: - inference_time: 2401.0 - throughput: 416.49312786339027 + inference_time: 2412.0 + throughput: 414.5936981757877 estimated_peak_memory_range: min: 28672 - max: 17275808 + max: 1429016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: jogky0yop + job_id: j1gle1ojp job_status: Passed torchscript_onnx_qnn: - inference_time: 2092.0 - throughput: 478.0114722753346 + inference_time: 2108.0 + throughput: 474.3833017077799 estimated_peak_memory_range: - min: 212992 - max: 12407432 + min: 229376 + max: 5466776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jw561m17p + job_id: jwgoe4oqp job_status: Passed torchscript_onnx_ort: - inference_time: 2607.0 - throughput: 383.5826620636747 + inference_time: 2712.0 + throughput: 368.7315634218289 estimated_peak_memory_range: - min: 32768 - max: 85476552 + min: 16384 + max: 20834136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 33 - job_id: j7gjlyl8p + job_id: jygzv44op job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:42.007134Z' + timestamp: '2024-06-11T11:56:08Z' - torchscript_onnx_tflite: - inference_time: 1797.0 - throughput: 556.4830272676684 + inference_time: 1740.0 + throughput: 574.7126436781609 estimated_peak_memory_range: min: 16384 - max: 28228496 + max: 29572928 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: jn5q212m5 + job_id: jw56qdr6g job_status: Passed torchscript_onnx_qnn: - inference_time: 1501.0 - throughput: 666.2225183211193 + inference_time: 1500.0 + throughput: 666.6666666666666 estimated_peak_memory_range: min: 204800 - max: 22080720 + max: 21850576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j1p3m7mzg + job_id: j1pvz99kg job_status: Passed torchscript_onnx_ort: - inference_time: 1873.0 - throughput: 533.9028296849973 + inference_time: 1855.0 + throughput: 539.0835579514825 estimated_peak_memory_range: - min: 217088 - max: 19951008 + min: 212992 + max: 19290704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 33 - job_id: jlpevxv05 + job_id: jz5wm113g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:42.007179Z' + timestamp: '2024-06-11T11:56:09Z' - torchscript_onnx_tflite: - inference_time: 2450.0 - throughput: 408.16326530612247 + inference_time: 2478.0 + throughput: 403.5512510088781 estimated_peak_memory_range: min: 24576 - max: 17617072 + max: 1690672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: j1glk8klp + job_id: j1p3qwx35 job_status: Passed torchscript_onnx_qnn: - inference_time: 2090.0 - throughput: 478.4688995215311 + inference_time: 2101.0 + throughput: 475.9638267491671 estimated_peak_memory_range: - min: 12288 - max: 35001256 + min: 221184 + max: 5373456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j1pvwmwmg + job_id: jlpe4llo5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:42.007209Z' + timestamp: '2024-06-11T11:56:07Z' - torchscript_onnx_qnn: - inference_time: 2946.0 - throughput: 339.44331296673454 + inference_time: 2949.0 + throughput: 339.097999321804 estimated_peak_memory_range: - min: 253952 - max: 253952 + min: 204800 + max: 204800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jwgovwvd5 + job_id: j7gjkwwv5 job_status: Passed torchscript_onnx_ort: - inference_time: 2713.0 - throughput: 368.59565057132323 + inference_time: 2692.0 + throughput: 371.4710252600297 estimated_peak_memory_range: - min: 12627968 - max: 12627968 + min: 13115392 + max: 13115392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 33 - job_id: jygz7y76p + job_id: jmg99xxwg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:42.007239Z' + timestamp: '2024-06-11T11:56:10Z' diff --git a/qai_hub_models/models/quicksrnetlarge/test.py b/qai_hub_models/models/quicksrnetlarge/test.py index ad63526d..6f148c3c 100644 --- a/qai_hub_models/models/quicksrnetlarge/test.py +++ b/qai_hub_models/models/quicksrnetlarge/test.py @@ -5,7 +5,7 @@ import numpy as np from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp -from qai_hub_models.models.quicksrnetlarge.demo import IMAGE_ADDRESS +from qai_hub_models.models._shared.super_resolution.demo import IMAGE_ADDRESS from qai_hub_models.models.quicksrnetlarge.demo import main as demo_main from qai_hub_models.models.quicksrnetlarge.model import ( MODEL_ASSET_VERSION, diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/README.md b/qai_hub_models/models/quicksrnetlarge_quantized/README.md index 71ddd422..025a873c 100644 --- a/qai_hub_models/models/quicksrnetlarge_quantized/README.md +++ b/qai_hub_models/models/quicksrnetlarge_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of QuickSRNetLarge-Quantized can be found [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms](https://arxiv.org/abs/2303.04336) diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/demo.py b/qai_hub_models/models/quicksrnetlarge_quantized/demo.py index 53d37094..1e090bfd 100644 --- a/qai_hub_models/models/quicksrnetlarge_quantized/demo.py +++ b/qai_hub_models/models/quicksrnetlarge_quantized/demo.py @@ -4,22 +4,15 @@ # --------------------------------------------------------------------- from qai_hub_models.models._shared.super_resolution.demo import super_resolution_demo from qai_hub_models.models.quicksrnetlarge_quantized.model import ( - MODEL_ASSET_VERSION, MODEL_ID, QuickSRNetLargeQuantizable, ) -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset - -IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, "quicksrnet_demo.jpg" -) def main(is_test: bool = False): super_resolution_demo( QuickSRNetLargeQuantizable, MODEL_ID, - default_image=IMAGE_ADDRESS, is_test=is_test, ) diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/export.py b/qai_hub_models/models/quicksrnetlarge_quantized/export.py index fcea80fa..4b832a72 100644 --- a/qai_hub_models/models/quicksrnetlarge_quantized/export.py +++ b/qai_hub_models/models/quicksrnetlarge_quantized/export.py @@ -195,7 +195,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/info.yaml b/qai_hub_models/models/quicksrnetlarge_quantized/info.yaml index 93c2001d..897f7e5f 100644 --- a/qai_hub_models/models/quicksrnetlarge_quantized/info.yaml +++ b/qai_hub_models/models/quicksrnetlarge_quantized/info.yaml @@ -13,13 +13,14 @@ research_paper: https://arxiv.org/abs/2303.04336 research_paper_title: 'QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms' license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf -deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf +deploy_license: + https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet technical_details: - Model checkpoint: quicksrnet_large_4x_checkpoint_int8 - Input resolution: 128x128 - Number of parameters: 436K - Model size: 464 KB + Model checkpoint: quicksrnet_large_3x_checkpoint + Input resolution: 640x360 + Number of parameters: 424K + Model size: 449 KB applicable_scenarios: - Virtual Real Estate Tours - Gaming diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/model.py b/qai_hub_models/models/quicksrnetlarge_quantized/model.py index b1541f6d..4767a779 100644 --- a/qai_hub_models/models/quicksrnetlarge_quantized/model.py +++ b/qai_hub_models/models/quicksrnetlarge_quantized/model.py @@ -18,6 +18,7 @@ from aimet_torch.model_preparer import prepare_model from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim +from qai_hub_models.models._shared.super_resolution.model import DEFAULT_SCALE_FACTOR from qai_hub_models.models.quicksrnetlarge.model import QuickSRNetLarge from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config from qai_hub_models.utils.asset_loaders import CachedWebModelAsset @@ -26,7 +27,6 @@ MODEL_ASSET_VERSION = 3 DEFAULT_ENCODINGS = "quicksrnetlarge_quantized_encodings.json" -SCALING_FACTOR = 4 class QuickSRNetLargeQuantizable(AIMETQuantizableMixin, QuickSRNetLarge): @@ -36,16 +36,16 @@ class QuickSRNetLargeQuantizable(AIMETQuantizableMixin, QuickSRNetLarge): Support for quantizing using your own weights & data will come at a later date.""" def __init__( - self, - quicksrnet_model: QuantizationSimModel, + self, quicksrnet_model: QuantizationSimModel, scale_factor: int ) -> None: - QuickSRNetLarge.__init__(self, quicksrnet_model.model) + QuickSRNetLarge.__init__(self, quicksrnet_model.model, scale_factor) AIMETQuantizableMixin.__init__(self, quicksrnet_model) @classmethod def from_pretrained( cls, aimet_encodings: str | None = "DEFAULT", + scale_factor: int = DEFAULT_SCALE_FACTOR, ) -> "QuickSRNetLargeQuantizable": """ Parameters: @@ -55,7 +55,7 @@ def from_pretrained( else: Interprets as a filepath and loads the encodings stored there. """ # Load Model - fp16_model = QuickSRNetLarge.from_pretrained() + fp16_model = QuickSRNetLarge.from_pretrained(scale_factor) input_shape = cls.get_input_spec()["image"][0] model = prepare_model(fp16_model) equalize_model(model, input_shape) @@ -78,4 +78,4 @@ def from_pretrained( sim.model.eval() - return cls(sim) + return cls(sim, scale_factor) diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml b/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml index c42927e0..6fc4336f 100644 --- a/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml +++ b/qai_hub_models/models/quicksrnetlarge_quantized/perf.yaml @@ -42,11 +42,11 @@ models: - name: QuickSRNetLarge-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1335.0 - throughput: 749.0636704119851 + inference_time: 1324.0 + throughput: 755.2870090634441 estimated_peak_memory_range: - min: 20480 - max: 1445696 + min: 12288 + max: 2457016 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +54,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: jvgdv4vlg + job_id: jvgd7zzrg job_status: Passed torchscript_onnx_qnn: - inference_time: 1153.0 - throughput: 867.3026886383348 + inference_time: 1159.0 + throughput: 862.8127696289905 estimated_peak_memory_range: - min: 16384 - max: 8430176 + min: 77824 + max: 3860912 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +69,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 19 - job_id: jegneq9mg + job_id: jz57v7795 job_status: Passed torchscript_onnx_ort: - inference_time: 1490.0 - throughput: 671.1409395973154 + inference_time: 1039.0 + throughput: 962.4639076034649 estimated_peak_memory_range: - min: 12288 - max: 72563040 + min: 69632 + max: 4717016 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 24 + layers_on_npu: 22 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 24 - job_id: j1p87r385 + total_layers: 22 + job_id: jegnr77q5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +93,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:42.041532Z' + timestamp: '2024-06-11T11:56:37Z' - torchscript_onnx_tflite: - inference_time: 1013.0 - throughput: 987.1668311944719 + inference_time: 1024.0 + throughput: 976.5625 estimated_peak_memory_range: - min: 12288 - max: 24747808 + min: 49152 + max: 25834320 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +107,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: jz57dnjr5 + job_id: jz5wm11mg job_status: Passed torchscript_onnx_qnn: - inference_time: 804.0 - throughput: 1243.7810945273632 + inference_time: 812.0 + throughput: 1231.527093596059 estimated_peak_memory_range: - min: 12288 - max: 19651520 + min: 7340032 + max: 27038272 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +122,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 19 - job_id: jopryd4eg + job_id: jqp4j991p job_status: Passed torchscript_onnx_ort: - inference_time: 1049.0 - throughput: 953.2888465204957 + inference_time: 776.0 + throughput: 1288.659793814433 estimated_peak_memory_range: - min: 0 - max: 15719840 + min: 36864 + max: 17135056 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 24 + layers_on_npu: 22 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 24 - job_id: jogky0lop + total_layers: 22 + job_id: jopr1nn7g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +146,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:42.041576Z' + timestamp: '2024-06-11T11:56:37Z' - torchscript_onnx_tflite: - inference_time: 1409.0 - throughput: 709.7232079488999 + inference_time: 1364.0 + throughput: 733.1378299120234 estimated_peak_memory_range: - min: 28672 - max: 1459792 + min: 16384 + max: 1375064 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +160,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: jqp4w4xlg + job_id: jmg99xx8g job_status: Passed torchscript_onnx_qnn: - inference_time: 1161.0 - throughput: 861.3264427217915 + inference_time: 1156.0 + throughput: 865.0519031141869 estimated_peak_memory_range: - min: 28672 - max: 8085960 + min: 94208 + max: 9070680 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +175,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 19 - job_id: jqpyd244p + job_id: jo5mvdd95 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +184,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:42.041604Z' + timestamp: '2024-06-11T11:56:36Z' - torchscript_onnx_tflite: - inference_time: 3568.0 - throughput: 280.2690582959641 + inference_time: 3979.0 + throughput: 251.31942699170645 estimated_peak_memory_range: min: 12288 - max: 17966464 + max: 18592624 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,22 +198,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: j0px1r79g - job_status: Passed - torchscript_onnx_qnn: - inference_time: 3190.0 - throughput: 313.47962382445144 - estimated_peak_memory_range: - min: 61440 - max: 18936608 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 19 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 19 - job_id: j2p0r91ep + job_id: jnp1qvv7g job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -222,13 +207,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-05-29T18:59:42.041632Z' + timestamp: '2024-06-11T11:56:31Z' - torchscript_onnx_tflite: - inference_time: 34339.0 - throughput: 29.121407146393313 + inference_time: 32895.0 + throughput: 30.399756801945585 estimated_peak_memory_range: - min: 3600384 - max: 5661144 + min: 4079616 + max: 6087016 primary_compute_unit: NPU precision: int8 layer_info: @@ -236,7 +221,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 31 - job_id: jo5mzkwqp + job_id: jvgd7zzzg job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -245,13 +230,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-05-29T18:59:42.041649Z' + timestamp: '2024-06-11T11:56:32Z' - torchscript_onnx_qnn: - inference_time: 1090.0 - throughput: 917.4311926605504 + inference_time: 1008.0 + throughput: 992.063492063492 estimated_peak_memory_range: - min: 49152 - max: 49152 + min: 90112 + max: 90112 primary_compute_unit: NPU precision: int8 layer_info: @@ -259,22 +244,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 19 - job_id: jep2md7m5 + job_id: j0pxeddl5 job_status: Passed torchscript_onnx_ort: - inference_time: 1440.0 - throughput: 694.4444444444445 + inference_time: 1090.0 + throughput: 917.4311926605504 estimated_peak_memory_range: - min: 8978432 - max: 8978432 + min: 4714496 + max: 4714496 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 24 + layers_on_npu: 22 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 24 - job_id: jn5q217m5 + total_layers: 22 + job_id: jep23vvqg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -283,4 +268,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:42.041677Z' + timestamp: '2024-06-11T11:56:38Z' diff --git a/qai_hub_models/models/quicksrnetlarge_quantized/test.py b/qai_hub_models/models/quicksrnetlarge_quantized/test.py index 16e59332..81430c06 100644 --- a/qai_hub_models/models/quicksrnetlarge_quantized/test.py +++ b/qai_hub_models/models/quicksrnetlarge_quantized/test.py @@ -2,30 +2,23 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- -import os -import zipfile import numpy as np import pytest import torch from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp -from qai_hub_models.models.quicksrnetlarge_quantized.demo import IMAGE_ADDRESS +from qai_hub_models.models._shared.super_resolution.demo import IMAGE_ADDRESS +from qai_hub_models.models.quicksrnetlarge.model import MODEL_ASSET_VERSION, MODEL_ID from qai_hub_models.models.quicksrnetlarge_quantized.demo import main as demo_main from qai_hub_models.models.quicksrnetlarge_quantized.model import ( - MODEL_ASSET_VERSION, - MODEL_ID, QuickSRNetLargeQuantizable, ) -from qai_hub_models.utils.asset_loaders import ( - CachedWebModelAsset, - load_image, - qaihm_temp_dir, -) +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image from qai_hub_models.utils.testing import assert_most_close, skip_clone_repo_check OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetlarge_quantized_output.png" + MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetlarge_demo_output.png" ) @@ -67,26 +60,6 @@ def test_trace(): ) -@pytest.mark.skip("https://github.com/tetraai/tetracode/issues/9606") -@skip_clone_repo_check -def test_aimet_export(): - model = QuickSRNetLargeQuantizable.from_pretrained() - name = model.__class__.__name__ - with qaihm_temp_dir() as tmpdir: - output_zip = model.convert_to_onnx_and_aimet_encodings( - tmpdir, - ) - assert os.path.exists(output_zip) - with zipfile.ZipFile(output_zip, "r") as zip: - assert zip.namelist() == [ - f"{name}.aimet/", - f"{name}.aimet/{name}.onnx", - f"{name}.aimet/{name}.encodings", - ] - - # No test of torchscipt and aimet encodings due to #8954 - - @skip_clone_repo_check def test_demo(): demo_main(is_test=True) diff --git a/qai_hub_models/models/quicksrnetmedium/README.md b/qai_hub_models/models/quicksrnetmedium/README.md index cb5b80f1..0e95ef93 100644 --- a/qai_hub_models/models/quicksrnetmedium/README.md +++ b/qai_hub_models/models/quicksrnetmedium/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of QuickSRNetMedium can be found [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms](https://arxiv.org/abs/2303.04336) diff --git a/qai_hub_models/models/quicksrnetmedium/demo.py b/qai_hub_models/models/quicksrnetmedium/demo.py index 51c1ffec..2d75fb92 100644 --- a/qai_hub_models/models/quicksrnetmedium/demo.py +++ b/qai_hub_models/models/quicksrnetmedium/demo.py @@ -3,16 +3,7 @@ # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- from qai_hub_models.models._shared.super_resolution.demo import super_resolution_demo -from qai_hub_models.models.quicksrnetmedium.model import ( - MODEL_ASSET_VERSION, - MODEL_ID, - QuickSRNetMedium, -) -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset - -IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetmedium_demo.jpg" -) +from qai_hub_models.models.quicksrnetmedium.model import MODEL_ID, QuickSRNetMedium # Run QuickSRNet end-to-end on a sample image. @@ -21,7 +12,6 @@ def main(is_test: bool = False): super_resolution_demo( model_cls=QuickSRNetMedium, model_id=MODEL_ID, - default_image=IMAGE_ADDRESS, is_test=is_test, ) diff --git a/qai_hub_models/models/quicksrnetmedium/export.py b/qai_hub_models/models/quicksrnetmedium/export.py index 32a7b7ba..20dca067 100644 --- a/qai_hub_models/models/quicksrnetmedium/export.py +++ b/qai_hub_models/models/quicksrnetmedium/export.py @@ -187,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/quicksrnetmedium/info.yaml b/qai_hub_models/models/quicksrnetmedium/info.yaml index 72ae05be..9f0a95c6 100644 --- a/qai_hub_models/models/quicksrnetmedium/info.yaml +++ b/qai_hub_models/models/quicksrnetmedium/info.yaml @@ -12,13 +12,14 @@ research_paper: https://arxiv.org/abs/2303.04336 research_paper_title: 'QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms' license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf -deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf +deploy_license: + https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet technical_details: - Model checkpoint: quicksrnet_medium_4x_checkpoint_float32 - Input resolution: 128x128 - Number of parameters: 61.0K - Model size: 244 KB + Model checkpoint: quicksrnet_medium_3x_checkpoint + Input resolution: 640x360 + Number of parameters: 55.0K + Model size: 220 KB applicable_scenarios: - Virtual Real Estate Tours - Gaming diff --git a/qai_hub_models/models/quicksrnetmedium/model.py b/qai_hub_models/models/quicksrnetmedium/model.py index abb5817a..65c91c46 100644 --- a/qai_hub_models/models/quicksrnetmedium/model.py +++ b/qai_hub_models/models/quicksrnetmedium/model.py @@ -4,86 +4,48 @@ # --------------------------------------------------------------------- from __future__ import annotations -import torch +from pathlib import Path -from qai_hub_models.evaluators.base_evaluators import BaseEvaluator -from qai_hub_models.evaluators.superres_evaluator import SuperResolutionOutputEvaluator from qai_hub_models.models._shared.quicksrnet.common import ( _load_quicksrnet_source_model, ) -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset -from qai_hub_models.utils.base_model import BaseModel -from qai_hub_models.utils.input_spec import InputSpec +from qai_hub_models.models._shared.super_resolution.model import ( + DEFAULT_SCALE_FACTOR, + SuperResolutionModel, + validate_scale_factor, +) +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_torch MODEL_ID = __name__.split(".")[-2] MODEL_ASSET_VERSION = 2 -# Weights and config stored in S3 are sourced from -# https://github.com/quic/aimet-model-zoo/blob/develop/aimet_zoo_torch/quicksrnet/model/model_cards/quicksrnet_medium_4x_w8a8.json -# https://github.com/quic/aimet-model-zoo/releases/download/phase_2_january_artifacts/quicksrnet_medium_4x_checkpoint_float32.pth.tar -QUICKSRNET_WEIGHTS = "quicksrnet_medium_4x_checkpoint_float32.pth.tar" -SCALING_FACTOR = 4 +BASE_ASSET_URL = "https://github.com/quic/aimet-model-zoo/releases/download/phase_2_january_artifacts/quicksrnet_medium_{scale_factor}x_checkpoint_float32.pth.tar" NUM_CHANNELS = 32 NUM_INTERMEDIATE_LAYERS = 5 -USE_ITO_CONNECTION = False - -class QuickSRNetMedium(BaseModel): - """Exportable QuickSRNet-Medium upscaler, end-to-end.""" - def __init__( - self, - quicksrnet_model: torch.nn.Module, - ) -> None: - super().__init__() - self.relu = torch.nn.ReLU() - self.model = quicksrnet_model +class QuickSRNetMedium(SuperResolutionModel): + """Exportable QuickSRNetMedium super resolution model, end-to-end.""" @classmethod - def from_pretrained(cls) -> QuickSRNetMedium: + def from_pretrained( + cls, scale_factor: int = DEFAULT_SCALE_FACTOR + ) -> QuickSRNetMedium: + validate_scale_factor(scale_factor) model = _load_quicksrnet_source_model( - SCALING_FACTOR, + scale_factor, NUM_CHANNELS, NUM_INTERMEDIATE_LAYERS, - USE_ITO_CONNECTION, + use_ito_connection=False, ) - dst = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, QUICKSRNET_WEIGHTS - ).fetch() - checkpoint = torch.load(dst, map_location=torch.device("cpu")) + url = BASE_ASSET_URL.format(scale_factor=scale_factor) + checkpoint_asset = CachedWebModelAsset( + url, + MODEL_ID, + MODEL_ASSET_VERSION, + Path(url).name, + ) + checkpoint = load_torch(checkpoint_asset) model.load_state_dict(checkpoint["state_dict"]) model.eval() - return cls(model) - - def get_evaluator(self) -> BaseEvaluator: - return SuperResolutionOutputEvaluator() - - def forward(self, image): - """ - Run QuickSRNet-Medium on `image`, and produce an upscaled image - - Parameters: - image: Pixel values pre-processed for model consumption. - Range: float[0, 1] - 3-channel Color Space: RGB - - Returns: - image: Pixel values - Range: float[0, 1] - 3-channel Color Space: RGB - """ - # image = self.relu(image) - return self.model(image) - - @staticmethod - def get_input_spec( - batch_size: int = 1, - num_channels: int = 3, - height: int = 128, - width: int = 128, - ) -> InputSpec: - # Get the input specification ordered (name -> (shape, type)) pairs for this model. - # - # This can be used with the qai_hub python API to declare - # the model input specification upon submitting a profile job. - return {"image": ((batch_size, num_channels, height, width), "float32")} + return cls(model, scale_factor) diff --git a/qai_hub_models/models/quicksrnetmedium/perf.yaml b/qai_hub_models/models/quicksrnetmedium/perf.yaml index 111b4ab6..a70a9366 100644 --- a/qai_hub_models/models/quicksrnetmedium/perf.yaml +++ b/qai_hub_models/models/quicksrnetmedium/perf.yaml @@ -36,11 +36,11 @@ models: - name: QuickSRNetMedium performance_metrics: - torchscript_onnx_tflite: - inference_time: 1387.0 - throughput: 720.9805335255949 + inference_time: 1343.0 + throughput: 744.6016381236038 estimated_peak_memory_range: - min: 24576 - max: 1439976 + min: 16384 + max: 1439320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jw561m37p + job_id: j2p0evvn5 job_status: Passed torchscript_onnx_qnn: - inference_time: 983.0 - throughput: 1017.293997965412 + inference_time: 988.0 + throughput: 1012.1457489878543 estimated_peak_memory_range: - min: 16384 - max: 9759576 + min: 12288 + max: 2409584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: j1pvwm1mg + job_id: jw56qdwyg job_status: Passed torchscript_onnx_ort: - inference_time: 1583.0 - throughput: 631.7119393556538 + inference_time: 1506.0 + throughput: 664.0106241699867 estimated_peak_memory_range: - min: 212992 - max: 56649320 + min: 217088 + max: 3451560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 19 - job_id: jz5w9zdjp + job_id: jygzv4nxp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:42.084964Z' + timestamp: '2024-06-11T11:57:01Z' - torchscript_onnx_tflite: - inference_time: 979.0 - throughput: 1021.4504596527069 + inference_time: 898.0 + throughput: 1113.5857461024498 estimated_peak_memory_range: - min: 16384 - max: 20039200 + min: 20480 + max: 20940320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: j1p3m74zg + job_id: j1p8w44op job_status: Passed torchscript_onnx_qnn: - inference_time: 653.0 - throughput: 1531.3935681470139 + inference_time: 645.0 + throughput: 1550.3875968992247 estimated_peak_memory_range: - min: 233472 - max: 17578544 + min: 208896 + max: 17163888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: j7gjly08p + job_id: jwgoe48kp job_status: Passed torchscript_onnx_ort: - inference_time: 1037.0 - throughput: 964.3201542912246 + inference_time: 1070.0 + throughput: 934.5794392523364 estimated_peak_memory_range: - min: 0 - max: 13337296 + min: 212992 + max: 13764384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 19 - job_id: jmg9423v5 + job_id: jz5wm14mg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:42.085007Z' + timestamp: '2024-06-11T11:57:02Z' - torchscript_onnx_tflite: - inference_time: 1419.0 - throughput: 704.7216349541931 + inference_time: 1369.0 + throughput: 730.4601899196493 estimated_peak_memory_range: - min: 32768 - max: 8332616 + min: 24576 + max: 1342320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jwgovw1d5 + job_id: jn5q9mmop job_status: Passed torchscript_onnx_qnn: - inference_time: 991.0 - throughput: 1009.0817356205853 + inference_time: 1010.0 + throughput: 990.0990099009902 estimated_peak_memory_range: - min: 212992 - max: 65514992 + min: 221184 + max: 7892152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jygz7yx6p + job_id: jlpe4lyv5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:42.085033Z' + timestamp: '2024-06-11T11:57:00Z' - torchscript_onnx_qnn: - inference_time: 1228.0 - throughput: 814.3322475570033 + inference_time: 1066.0 + throughput: 938.0863039399625 estimated_peak_memory_range: - min: 237568 - max: 237568 + min: 204800 + max: 204800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jlpevxr05 + job_id: j7gjkwqe5 job_status: Passed torchscript_onnx_ort: - inference_time: 1529.0 - throughput: 654.0222367560497 + inference_time: 1498.0 + throughput: 667.5567423230974 estimated_peak_memory_range: - min: 8851456 - max: 8851456 + min: 9003008 + max: 9003008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 19 - job_id: jnp181dlg + job_id: jmg99xd8g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:42.085060Z' + timestamp: '2024-06-11T11:57:03Z' diff --git a/qai_hub_models/models/quicksrnetmedium/test.py b/qai_hub_models/models/quicksrnetmedium/test.py index 9cd04d8e..4c3129a0 100644 --- a/qai_hub_models/models/quicksrnetmedium/test.py +++ b/qai_hub_models/models/quicksrnetmedium/test.py @@ -5,7 +5,7 @@ import numpy as np from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp -from qai_hub_models.models.quicksrnetmedium.demo import IMAGE_ADDRESS +from qai_hub_models.models._shared.super_resolution.demo import IMAGE_ADDRESS from qai_hub_models.models.quicksrnetmedium.demo import main as demo_main from qai_hub_models.models.quicksrnetmedium.model import ( MODEL_ASSET_VERSION, diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/README.md b/qai_hub_models/models/quicksrnetmedium_quantized/README.md index 83ebe05d..2ffc7d9f 100644 --- a/qai_hub_models/models/quicksrnetmedium_quantized/README.md +++ b/qai_hub_models/models/quicksrnetmedium_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of QuickSRNetMedium-Quantized can be found [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms](https://arxiv.org/abs/2303.04336) diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/demo.py b/qai_hub_models/models/quicksrnetmedium_quantized/demo.py index f45370ab..4d488e7e 100644 --- a/qai_hub_models/models/quicksrnetmedium_quantized/demo.py +++ b/qai_hub_models/models/quicksrnetmedium_quantized/demo.py @@ -4,22 +4,15 @@ # --------------------------------------------------------------------- from qai_hub_models.models._shared.super_resolution.demo import super_resolution_demo from qai_hub_models.models.quicksrnetmedium_quantized.model import ( - MODEL_ASSET_VERSION, MODEL_ID, QuickSRNetMediumQuantizable, ) -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset - -IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetmedium_demo.jpg" -) def main(is_test: bool = False): super_resolution_demo( QuickSRNetMediumQuantizable, MODEL_ID, - default_image=IMAGE_ADDRESS, is_test=is_test, ) diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/export.py b/qai_hub_models/models/quicksrnetmedium_quantized/export.py index 18909474..fa37875f 100644 --- a/qai_hub_models/models/quicksrnetmedium_quantized/export.py +++ b/qai_hub_models/models/quicksrnetmedium_quantized/export.py @@ -195,7 +195,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/info.yaml b/qai_hub_models/models/quicksrnetmedium_quantized/info.yaml index 070615b3..e17071f4 100644 --- a/qai_hub_models/models/quicksrnetmedium_quantized/info.yaml +++ b/qai_hub_models/models/quicksrnetmedium_quantized/info.yaml @@ -13,13 +13,14 @@ research_paper: https://arxiv.org/abs/2303.04336 research_paper_title: 'QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms' license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf -deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf +deploy_license: + https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet technical_details: - Model checkpoint: quicksrnet_medium_4x_checkpoint_int8 - Input resolution: 128x128 - Number of parameters: 61.0K - Model size: 244 KB + Model checkpoint: quicksrnet_medium_3x_checkpoint + Input resolution: 640x360 + Number of parameters: 55.0K + Model size: 67.2 KB applicable_scenarios: - Virtual Real Estate Tours - Gaming diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/model.py b/qai_hub_models/models/quicksrnetmedium_quantized/model.py index 1c17a3dc..e16d87c5 100644 --- a/qai_hub_models/models/quicksrnetmedium_quantized/model.py +++ b/qai_hub_models/models/quicksrnetmedium_quantized/model.py @@ -12,12 +12,12 @@ ) # isort: on - import torch from aimet_torch.cross_layer_equalization import equalize_model from aimet_torch.model_preparer import prepare_model from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim +from qai_hub_models.models._shared.super_resolution.model import DEFAULT_SCALE_FACTOR from qai_hub_models.models.quicksrnetmedium.model import QuickSRNetMedium from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config from qai_hub_models.utils.asset_loaders import CachedWebModelAsset @@ -26,7 +26,6 @@ MODEL_ASSET_VERSION = 4 DEFAULT_ENCODINGS = "quicksrnetmedium_quantized_encodings.json" -SCALING_FACTOR = 4 class QuickSRNetMediumQuantizable(AIMETQuantizableMixin, QuickSRNetMedium): @@ -37,14 +36,16 @@ class QuickSRNetMediumQuantizable(AIMETQuantizableMixin, QuickSRNetMedium): def __init__( self, quicksrnet_model: QuantizationSimModel, + scale_factor: int, ) -> None: - QuickSRNetMedium.__init__(self, quicksrnet_model.model) + QuickSRNetMedium.__init__(self, quicksrnet_model.model, scale_factor) AIMETQuantizableMixin.__init__(self, quicksrnet_model) @classmethod def from_pretrained( cls, aimet_encodings: str | None = "DEFAULT", + scale_factor: int = DEFAULT_SCALE_FACTOR, ) -> "QuickSRNetMediumQuantizable": """ Parameters: @@ -54,7 +55,7 @@ def from_pretrained( else: Interprets as a filepath and loads the encodings stored there. """ # Load Model - fp16_model = QuickSRNetMedium.from_pretrained() + fp16_model = QuickSRNetMedium.from_pretrained(scale_factor) input_shape = cls.get_input_spec()["image"][0] model = prepare_model(fp16_model) equalize_model(model, input_shape) @@ -77,4 +78,4 @@ def from_pretrained( sim.model.eval() - return cls(sim) + return cls(sim, scale_factor) diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml b/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml index f1d881d8..72df875e 100644 --- a/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml +++ b/qai_hub_models/models/quicksrnetmedium_quantized/perf.yaml @@ -42,11 +42,11 @@ models: - name: QuickSRNetMedium-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1042.0 - throughput: 959.6928982725528 + inference_time: 1000.0 + throughput: 1000.0 estimated_peak_memory_range: - min: 24576 - max: 1638272 + min: 12288 + max: 5493824 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +54,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jz57dnvr5 + job_id: jvgd7z2zg job_status: Passed torchscript_onnx_qnn: - inference_time: 801.0 - throughput: 1248.4394506866417 + inference_time: 803.0 + throughput: 1245.3300124533 estimated_peak_memory_range: min: 16384 - max: 10253480 + max: 10291792 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +69,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jopryd1eg + job_id: jegnr7kq5 job_status: Passed torchscript_onnx_ort: - inference_time: 1171.0 - throughput: 853.9709649871904 + inference_time: 757.0 + throughput: 1321.003963011889 estimated_peak_memory_range: - min: 212992 - max: 24635072 + min: 65536 + max: 19746264 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 16 + layers_on_npu: 14 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 16 - job_id: jogky0rop + total_layers: 14 + job_id: j2p0ev6n5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +93,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:42.119455Z' + timestamp: '2024-06-11T11:57:28Z' - torchscript_onnx_tflite: - inference_time: 812.0 - throughput: 1231.527093596059 + inference_time: 814.0 + throughput: 1228.5012285012285 estimated_peak_memory_range: min: 12288 - max: 19349424 + max: 20707552 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +107,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jqp4w4jlg + job_id: jz57v7995 job_status: Passed torchscript_onnx_qnn: - inference_time: 540.0 - throughput: 1851.851851851852 + inference_time: 546.0 + throughput: 1831.5018315018315 estimated_peak_memory_range: min: 65536 - max: 14246880 + max: 14574352 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +122,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jep2md3m5 + job_id: jopr1nw7g job_status: Passed torchscript_onnx_ort: - inference_time: 853.0 - throughput: 1172.3329425556858 + inference_time: 558.0 + throughput: 1792.1146953405018 estimated_peak_memory_range: - min: 212992 - max: 14121760 + min: 65536 + max: 12140448 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 16 + layers_on_npu: 14 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 16 - job_id: jn5q219m5 + total_layers: 14 + job_id: j1p8w41op job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +146,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:42.119496Z' + timestamp: '2024-06-11T11:57:29Z' - torchscript_onnx_tflite: - inference_time: 1898.0 - throughput: 526.8703898840885 + inference_time: 995.0 + throughput: 1005.0251256281407 estimated_peak_memory_range: - min: 32768 - max: 2871560 + min: 24576 + max: 3118760 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +160,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: j0px1re9g + job_id: jqp4j931p job_status: Passed torchscript_onnx_qnn: - inference_time: 819.0 - throughput: 1221.001221001221 + inference_time: 800.0 + throughput: 1250.0 estimated_peak_memory_range: - min: 65536 - max: 68766280 + min: 16384 + max: 18363240 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +175,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: j2p0r9eep + job_id: jqpyv7mlp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +184,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:42.119523Z' + timestamp: '2024-06-11T11:57:27Z' - torchscript_onnx_tflite: - inference_time: 2862.0 - throughput: 349.4060097833683 + inference_time: 1968.0 + throughput: 508.130081300813 estimated_peak_memory_range: - min: 16384 - max: 14394944 + min: 12288 + max: 14747456 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,22 +198,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jo5mzkvqp - job_status: Passed - torchscript_onnx_qnn: - inference_time: 1349.0 - throughput: 741.2898443291327 - estimated_peak_memory_range: - min: 61440 - max: 14437104 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 11 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 11 - job_id: j1p87rw85 + job_id: j0pxedxl5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -222,13 +207,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-05-29T18:59:42.119548Z' + timestamp: '2024-06-11T11:57:22Z' - torchscript_onnx_tflite: - inference_time: 8787.0 - throughput: 113.80448389666553 + inference_time: 9155.0 + throughput: 109.22992900054615 estimated_peak_memory_range: - min: 3321856 - max: 10915320 + min: 3342336 + max: 7015776 primary_compute_unit: NPU precision: int8 layer_info: @@ -236,7 +221,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jegneqrmg + job_id: jo5mvd895 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -245,13 +230,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-05-29T18:59:42.119564Z' + timestamp: '2024-06-11T11:57:23Z' - torchscript_onnx_qnn: - inference_time: 750.0 - throughput: 1333.3333333333333 + inference_time: 764.0 + throughput: 1308.9005235602094 estimated_peak_memory_range: - min: 49152 - max: 49152 + min: 1196032 + max: 1196032 primary_compute_unit: NPU precision: int8 layer_info: @@ -259,22 +244,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jqpyd2v4p + job_id: jep23veqg job_status: Passed torchscript_onnx_ort: - inference_time: 1208.0 - throughput: 827.8145695364238 + inference_time: 781.0 + throughput: 1280.4097311139565 estimated_peak_memory_range: - min: 8826880 - max: 8826880 + min: 7262208 + max: 7262208 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 16 + layers_on_npu: 14 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 16 - job_id: j1glk8elp + total_layers: 14 + job_id: jogkr98n5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -283,4 +268,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:42.119590Z' + timestamp: '2024-06-11T11:57:30Z' diff --git a/qai_hub_models/models/quicksrnetmedium_quantized/test.py b/qai_hub_models/models/quicksrnetmedium_quantized/test.py index c8c6ea58..6f29e238 100644 --- a/qai_hub_models/models/quicksrnetmedium_quantized/test.py +++ b/qai_hub_models/models/quicksrnetmedium_quantized/test.py @@ -2,30 +2,23 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- -import os -import zipfile import numpy as np import pytest import torch from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp -from qai_hub_models.models.quicksrnetmedium_quantized.demo import IMAGE_ADDRESS +from qai_hub_models.models._shared.super_resolution.demo import IMAGE_ADDRESS +from qai_hub_models.models.quicksrnetmedium.model import MODEL_ASSET_VERSION, MODEL_ID from qai_hub_models.models.quicksrnetmedium_quantized.demo import main as demo_main from qai_hub_models.models.quicksrnetmedium_quantized.model import ( - MODEL_ASSET_VERSION, - MODEL_ID, QuickSRNetMediumQuantizable, ) -from qai_hub_models.utils.asset_loaders import ( - CachedWebModelAsset, - load_image, - qaihm_temp_dir, -) +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image from qai_hub_models.utils.testing import assert_most_close, skip_clone_repo_check OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetmedium_quantized_output.png" + MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetmedium_demo_output.png" ) @@ -67,24 +60,6 @@ def test_trace(): ) -@pytest.mark.skip("https://github.com/tetraai/tetracode/issues/9606") -@skip_clone_repo_check -def test_aimet_export(): - model = QuickSRNetMediumQuantizable.from_pretrained() - name = model.__class__.__name__ - with qaihm_temp_dir() as tmpdir: - output_zip = model.convert_to_onnx_and_aimet_encodings( - tmpdir, - ) - assert os.path.exists(output_zip) - with zipfile.ZipFile(output_zip, "r") as zip: - assert zip.namelist() == [ - f"{name}.aimet/", - f"{name}.aimet/{name}.onnx", - f"{name}.aimet/{name}.encodings", - ] - - @skip_clone_repo_check def test_demo(): demo_main(is_test=True) diff --git a/qai_hub_models/models/quicksrnetsmall/README.md b/qai_hub_models/models/quicksrnetsmall/README.md index 665e005e..ada2e6c6 100644 --- a/qai_hub_models/models/quicksrnetsmall/README.md +++ b/qai_hub_models/models/quicksrnetsmall/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of QuickSRNetSmall can be found [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms](https://arxiv.org/abs/2303.04336) diff --git a/qai_hub_models/models/quicksrnetsmall/demo.py b/qai_hub_models/models/quicksrnetsmall/demo.py index dd0e6c43..5179a61a 100644 --- a/qai_hub_models/models/quicksrnetsmall/demo.py +++ b/qai_hub_models/models/quicksrnetsmall/demo.py @@ -3,16 +3,7 @@ # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- from qai_hub_models.models._shared.super_resolution.demo import super_resolution_demo -from qai_hub_models.models.quicksrnetsmall.model import ( - MODEL_ASSET_VERSION, - MODEL_ID, - QuickSRNetSmall, -) -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset - -IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetsmall_demo.jpg" -) +from qai_hub_models.models.quicksrnetsmall.model import MODEL_ID, QuickSRNetSmall # Run QuickSRNet end-to-end on a sample image. @@ -21,7 +12,6 @@ def main(is_test: bool = False): super_resolution_demo( model_cls=QuickSRNetSmall, model_id=MODEL_ID, - default_image=IMAGE_ADDRESS, is_test=is_test, ) diff --git a/qai_hub_models/models/quicksrnetsmall/export.py b/qai_hub_models/models/quicksrnetsmall/export.py index 4d78a4de..7bf13e43 100644 --- a/qai_hub_models/models/quicksrnetsmall/export.py +++ b/qai_hub_models/models/quicksrnetsmall/export.py @@ -187,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/quicksrnetsmall/info.yaml b/qai_hub_models/models/quicksrnetsmall/info.yaml index 128750f2..94d58142 100644 --- a/qai_hub_models/models/quicksrnetsmall/info.yaml +++ b/qai_hub_models/models/quicksrnetsmall/info.yaml @@ -12,13 +12,14 @@ research_paper: https://arxiv.org/abs/2303.04336 research_paper_title: 'QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms' license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf -deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf +deploy_license: + https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet technical_details: - Model checkpoint: quicksrnet_small_4x_checkpoint_float32 - Input resolution: 128x128 - Number of parameters: 76.0M - Model size: 290 MB + Model checkpoint: quicksrnet_small_3x_checkpoint + Input resolution: 640x360 + Number of parameters: 27.2K + Model size: 110 KB applicable_scenarios: - Virtual Real Estate Tours - Gaming diff --git a/qai_hub_models/models/quicksrnetsmall/model.py b/qai_hub_models/models/quicksrnetsmall/model.py index f1eb380f..9b21851e 100644 --- a/qai_hub_models/models/quicksrnetsmall/model.py +++ b/qai_hub_models/models/quicksrnetsmall/model.py @@ -4,85 +4,48 @@ # --------------------------------------------------------------------- from __future__ import annotations -import torch +from pathlib import Path -from qai_hub_models.evaluators.base_evaluators import BaseEvaluator -from qai_hub_models.evaluators.superres_evaluator import SuperResolutionOutputEvaluator from qai_hub_models.models._shared.quicksrnet.common import ( _load_quicksrnet_source_model, ) -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset -from qai_hub_models.utils.base_model import BaseModel -from qai_hub_models.utils.input_spec import InputSpec +from qai_hub_models.models._shared.super_resolution.model import ( + DEFAULT_SCALE_FACTOR, + SuperResolutionModel, + validate_scale_factor, +) +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_torch MODEL_ID = __name__.split(".")[-2] MODEL_ASSET_VERSION = 2 -# Weights and config stored in S3 are sourced from -# https://github.com/quic/aimet-model-zoo/blob/develop/aimet_zoo_torch/quicksrnet/model/model_cards/quicksrnet_small_4x_w8a8.json -# https://github.com/quic/aimet-model-zoo/releases/download/phase_2_january_artifacts/quicksrnet_small_4x_checkpoint_float32.pth.tar -QUICKSRNET_WEIGHTS = "quicksrnet_small_4x_checkpoint_float32.pth.tar" -SCALING_FACTOR = 4 +BASE_ASSET_URL = "https://github.com/quic/aimet-model-zoo/releases/download/phase_2_january_artifacts/quicksrnet_small_{scale_factor}x_checkpoint_float32.pth.tar" NUM_CHANNELS = 32 NUM_INTERMEDIATE_LAYERS = 2 -USE_ITO_CONNECTION = False - -class QuickSRNetSmall(BaseModel): - """Exportable QuickSRNet-Small upscaler, end-to-end.""" - def __init__( - self, - quicksrnet_model: torch.nn.Module, - ) -> None: - super().__init__() - self.model = quicksrnet_model +class QuickSRNetSmall(SuperResolutionModel): + """Exportable QuickSRNetSmall super resolution model, end-to-end.""" @classmethod - def from_pretrained(cls) -> QuickSRNetSmall: + def from_pretrained( + cls, scale_factor: int = DEFAULT_SCALE_FACTOR + ) -> QuickSRNetSmall: + validate_scale_factor(scale_factor) model = _load_quicksrnet_source_model( - SCALING_FACTOR, + scale_factor, NUM_CHANNELS, NUM_INTERMEDIATE_LAYERS, - USE_ITO_CONNECTION, + use_ito_connection=False, + ) + url = BASE_ASSET_URL.format(scale_factor=scale_factor) + checkpoint_asset = CachedWebModelAsset( + url, + MODEL_ID, + MODEL_ASSET_VERSION, + Path(url).name, ) - dst = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, QUICKSRNET_WEIGHTS - ).fetch() - checkpoint = torch.load(dst, map_location=torch.device("cpu")) + checkpoint = load_torch(checkpoint_asset) model.load_state_dict(checkpoint["state_dict"]) model.eval() - return cls(model) - - def get_evaluator(self) -> BaseEvaluator: - return SuperResolutionOutputEvaluator() - - def forward(self, image): - """ - Run QuickSRNet-Small on `image`, and produce an upscaled image - - Parameters: - image: Pixel values pre-processed for model consumption. - Range: float[0, 1] - 3-channel Color Space: RGB - - Returns: - image: Pixel values - Range: float[0, 1] - 3-channel Color Space: RGB - """ - - return self.model(image) - - @staticmethod - def get_input_spec( - batch_size: int = 1, - num_channels: int = 3, - height: int = 128, - width: int = 128, - ) -> InputSpec: - # Get the input specification ordered (name -> (shape, type)) pairs for this model. - # - # This can be used with the qai_hub python API to declare - # the model input specification upon submitting a profile job. - return {"image": ((batch_size, num_channels, height, width), "float32")} + return cls(model, scale_factor) diff --git a/qai_hub_models/models/quicksrnetsmall/perf.yaml b/qai_hub_models/models/quicksrnetsmall/perf.yaml index 39e979d9..16c8950f 100644 --- a/qai_hub_models/models/quicksrnetsmall/perf.yaml +++ b/qai_hub_models/models/quicksrnetsmall/perf.yaml @@ -36,11 +36,11 @@ models: - name: QuickSRNetSmall performance_metrics: - torchscript_onnx_tflite: - inference_time: 1375.0 - throughput: 727.2727272727273 + inference_time: 1334.0 + throughput: 749.6251874062968 estimated_peak_memory_range: - min: 24576 - max: 1884800 + min: 28672 + max: 1646912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: j1p3m7qzg + job_id: j1gle1xmp job_status: Passed torchscript_onnx_qnn: - inference_time: 998.0 - throughput: 1002.0040080160321 + inference_time: 1004.0 + throughput: 996.01593625498 estimated_peak_memory_range: - min: 212992 - max: 57987368 + min: 221184 + max: 10711856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: j7gjlyk8p + job_id: jwgoe4rkp job_status: Passed torchscript_onnx_ort: - inference_time: 1424.0 - throughput: 702.2471910112359 + inference_time: 1405.0 + throughput: 711.7437722419929 estimated_peak_memory_range: - min: 12288 - max: 67552416 + min: 212992 + max: 2559280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 13 - job_id: jmg9429v5 + job_id: jygzv4mxp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:42.162704Z' + timestamp: '2024-06-11T11:57:52Z' - torchscript_onnx_tflite: - inference_time: 896.0 - throughput: 1116.0714285714287 + inference_time: 936.0 + throughput: 1068.3760683760684 estimated_peak_memory_range: min: 16384 - max: 18791088 + max: 19633600 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jwgovwed5 + job_id: jw56qd7yg job_status: Passed torchscript_onnx_qnn: - inference_time: 627.0 - throughput: 1594.896331738437 + inference_time: 624.0 + throughput: 1602.5641025641025 estimated_peak_memory_range: - min: 0 - max: 13197104 + min: 208896 + max: 13403568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jlpevx405 + job_id: j1pvz9drg job_status: Passed torchscript_onnx_ort: - inference_time: 944.0 - throughput: 1059.322033898305 + inference_time: 949.0 + throughput: 1053.740779768177 estimated_peak_memory_range: min: 212992 - max: 12361728 + max: 12509200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 13 - job_id: jnp181qlg + job_id: jz5wm17mg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:42.162746Z' + timestamp: '2024-06-11T11:57:53Z' - torchscript_onnx_tflite: - inference_time: 1357.0 - throughput: 736.9196757553427 + inference_time: 1319.0 + throughput: 758.1501137225171 estimated_peak_memory_range: - min: 28672 - max: 8270448 + min: 20480 + max: 7876136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: j1pvwmzmg + job_id: j1p3qw9n5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1019.0 - throughput: 981.3542688910696 + inference_time: 992.0 + throughput: 1008.0645161290323 estimated_peak_memory_range: - min: 221184 - max: 8704880 + min: 229376 + max: 12485448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jz5w9zmjp + job_id: jlpe4lzv5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:42.162771Z' + timestamp: '2024-06-11T11:57:51Z' - torchscript_onnx_qnn: - inference_time: 1147.0 - throughput: 871.8395815170009 + inference_time: 1112.0 + throughput: 899.2805755395683 estimated_peak_memory_range: - min: 204800 - max: 204800 + min: 221184 + max: 221184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 11 - job_id: jygz7yv6p + job_id: j7gjkw7e5 job_status: Passed torchscript_onnx_ort: - inference_time: 1432.0 - throughput: 698.3240223463687 + inference_time: 1419.0 + throughput: 704.7216349541931 estimated_peak_memory_range: - min: 9011200 - max: 9011200 + min: 8966144 + max: 8966144 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 13 - job_id: jvgdv47lg + job_id: jmg99xm8g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:42.162795Z' + timestamp: '2024-06-11T11:57:53Z' diff --git a/qai_hub_models/models/quicksrnetsmall/test.py b/qai_hub_models/models/quicksrnetsmall/test.py index 87ccaee0..3e28e7d2 100644 --- a/qai_hub_models/models/quicksrnetsmall/test.py +++ b/qai_hub_models/models/quicksrnetsmall/test.py @@ -5,7 +5,7 @@ import numpy as np from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp -from qai_hub_models.models.quicksrnetsmall.demo import IMAGE_ADDRESS +from qai_hub_models.models._shared.super_resolution.demo import IMAGE_ADDRESS from qai_hub_models.models.quicksrnetsmall.demo import main as demo_main from qai_hub_models.models.quicksrnetsmall.model import ( MODEL_ASSET_VERSION, diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/README.md b/qai_hub_models/models/quicksrnetsmall_quantized/README.md index 7380e967..8573495f 100644 --- a/qai_hub_models/models/quicksrnetsmall_quantized/README.md +++ b/qai_hub_models/models/quicksrnetsmall_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of QuickSRNetSmall-Quantized can be found [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms](https://arxiv.org/abs/2303.04336) diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/demo.py b/qai_hub_models/models/quicksrnetsmall_quantized/demo.py index cb2dcd45..8b411c0b 100644 --- a/qai_hub_models/models/quicksrnetsmall_quantized/demo.py +++ b/qai_hub_models/models/quicksrnetsmall_quantized/demo.py @@ -4,22 +4,15 @@ # --------------------------------------------------------------------- from qai_hub_models.models._shared.super_resolution.demo import super_resolution_demo from qai_hub_models.models.quicksrnetsmall_quantized.model import ( - MODEL_ASSET_VERSION, MODEL_ID, QuickSRNetSmallQuantizable, ) -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset - -IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, "quicksrnet_demo.jpg" -) def main(is_test: bool = False): super_resolution_demo( QuickSRNetSmallQuantizable, MODEL_ID, - default_image=IMAGE_ADDRESS, is_test=is_test, ) diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/export.py b/qai_hub_models/models/quicksrnetsmall_quantized/export.py index 69aec297..48cf6baf 100644 --- a/qai_hub_models/models/quicksrnetsmall_quantized/export.py +++ b/qai_hub_models/models/quicksrnetsmall_quantized/export.py @@ -195,7 +195,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/info.yaml b/qai_hub_models/models/quicksrnetsmall_quantized/info.yaml index fad05b98..37b32e7f 100644 --- a/qai_hub_models/models/quicksrnetsmall_quantized/info.yaml +++ b/qai_hub_models/models/quicksrnetsmall_quantized/info.yaml @@ -13,13 +13,14 @@ research_paper: https://arxiv.org/abs/2303.04336 research_paper_title: 'QuickSRNet: Plain Single-Image Super-Resolution Architecture for Faster Inference on Mobile Platforms' license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf -deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf +deploy_license: + https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/quicksrnet technical_details: - Model checkpoint: quicksrnet_small_4x_checkpoint_int8 - Input resolution: 128x128 - Number of parameters: 33.3K - Model size: 42.5 KB + Model checkpoint: quicksrnet_small_3x_checkpoint + Input resolution: 640x360 + Number of parameters: 27.2K + Model size: 34.9 KB applicable_scenarios: - Virtual Real Estate Tours - Gaming diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/model.py b/qai_hub_models/models/quicksrnetsmall_quantized/model.py index 57c495a8..9b1c83a6 100644 --- a/qai_hub_models/models/quicksrnetsmall_quantized/model.py +++ b/qai_hub_models/models/quicksrnetsmall_quantized/model.py @@ -12,12 +12,12 @@ ) # isort: on - import torch from aimet_torch.cross_layer_equalization import equalize_model from aimet_torch.model_preparer import prepare_model from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim +from qai_hub_models.models._shared.super_resolution.model import DEFAULT_SCALE_FACTOR from qai_hub_models.models.quicksrnetsmall.model import QuickSRNetSmall from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config from qai_hub_models.utils.asset_loaders import CachedWebModelAsset @@ -26,7 +26,6 @@ MODEL_ASSET_VERSION = 4 DEFAULT_ENCODINGS = "quicksrnetsmall_quantized_encodings.json" -SCALING_FACTOR = 4 class QuickSRNetSmallQuantizable(AIMETQuantizableMixin, QuickSRNetSmall): @@ -37,13 +36,16 @@ class QuickSRNetSmallQuantizable(AIMETQuantizableMixin, QuickSRNetSmall): def __init__( self, quicksrnet_model: QuantizationSimModel, + scale_factor: int, ) -> None: - QuickSRNetSmall.__init__(self, quicksrnet_model.model) + QuickSRNetSmall.__init__(self, quicksrnet_model.model, scale_factor) AIMETQuantizableMixin.__init__(self, quicksrnet_model) @classmethod def from_pretrained( - cls, aimet_encodings: str | None = "DEFAULT" + cls, + aimet_encodings: str | None = "DEFAULT", + scale_factor: int = DEFAULT_SCALE_FACTOR, ) -> "QuickSRNetSmallQuantizable": """ Parameters: @@ -53,7 +55,7 @@ def from_pretrained( else: Interprets as a filepath and loads the encodings stored there. """ # Load Model - fp16_model = QuickSRNetSmall.from_pretrained() + fp16_model = QuickSRNetSmall.from_pretrained(scale_factor) input_shape = cls.get_input_spec()["image"][0] model = prepare_model(fp16_model) equalize_model(model, input_shape) @@ -76,4 +78,4 @@ def from_pretrained( sim.model.eval() - return cls(sim) + return cls(sim, scale_factor) diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml b/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml index c846aab6..babfa73c 100644 --- a/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml +++ b/qai_hub_models/models/quicksrnetsmall_quantized/perf.yaml @@ -42,11 +42,11 @@ models: - name: QuickSRNetSmall-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 950.0 - throughput: 1052.6315789473683 + inference_time: 974.0 + throughput: 1026.694045174538 estimated_peak_memory_range: - min: 65536 - max: 1644392 + min: 12288 + max: 2677152 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +54,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jqp4w48lg + job_id: jvgd7z3zg job_status: Passed torchscript_onnx_qnn: - inference_time: 668.0 - throughput: 1497.005988023952 + inference_time: 671.0 + throughput: 1490.312965722802 estimated_peak_memory_range: - min: 16384 - max: 2183568 + min: 65536 + max: 3287624 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +69,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 8 - job_id: jep2mdjm5 + job_id: jegnr7nq5 job_status: Passed torchscript_onnx_ort: - inference_time: 1092.0 - throughput: 915.7509157509157 + inference_time: 691.0 + throughput: 1447.178002894356 estimated_peak_memory_range: - min: 212992 - max: 2472616 + min: 53248 + max: 3206304 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 14 + layers_on_npu: 12 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 14 - job_id: jn5q21wm5 + total_layers: 12 + job_id: j2p0evjn5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +93,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:42.196900Z' + timestamp: '2024-06-11T11:58:18Z' - torchscript_onnx_tflite: - inference_time: 780.0 - throughput: 1282.051282051282 + inference_time: 793.0 + throughput: 1261.034047919294 estimated_peak_memory_range: - min: 12288 - max: 18203808 + min: 16384 + max: 19014000 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +107,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: j0px1rm9g + job_id: jz57v7495 job_status: Passed torchscript_onnx_qnn: - inference_time: 450.0 - throughput: 2222.222222222222 + inference_time: 458.0 + throughput: 2183.406113537118 estimated_peak_memory_range: - min: 65536 - max: 13513920 + min: 81920 + max: 14588544 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +122,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 8 - job_id: jqpyd2n4p + job_id: jopr1n07g job_status: Passed torchscript_onnx_ort: - inference_time: 845.0 - throughput: 1183.4319526627219 + inference_time: 553.0 + throughput: 1808.3182640144666 estimated_peak_memory_range: - min: 12288 - max: 13152976 + min: 65536 + max: 10727936 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 14 + layers_on_npu: 12 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 14 - job_id: j1glk87lp + total_layers: 12 + job_id: j1p8w4xop job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +146,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:42.196939Z' + timestamp: '2024-06-11T11:58:19Z' - torchscript_onnx_tflite: - inference_time: 987.0 - throughput: 1013.1712259371834 + inference_time: 960.0 + throughput: 1041.6666666666667 estimated_peak_memory_range: - min: 24576 - max: 1367976 + min: 20480 + max: 1503368 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +160,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jo5mzk4qp + job_id: jqp4j911p job_status: Passed torchscript_onnx_qnn: inference_time: 672.0 throughput: 1488.095238095238 estimated_peak_memory_range: - min: 28672 - max: 3499584 + min: 12288 + max: 47001808 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +175,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 8 - job_id: j1p87r885 + job_id: jqpyv7xlp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +184,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:42.196966Z' + timestamp: '2024-06-11T11:58:17Z' - torchscript_onnx_tflite: - inference_time: 2693.0 - throughput: 371.3330857779428 + inference_time: 1754.0 + throughput: 570.1254275940707 estimated_peak_memory_range: min: 12288 - max: 13952064 + max: 13580528 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,22 +198,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jegneqxmg - job_status: Passed - torchscript_onnx_qnn: - inference_time: 1247.0 - throughput: 801.924619085806 - estimated_peak_memory_range: - min: 12288 - max: 13441264 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 8 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 8 - job_id: jogky0dop + job_id: j0pxed4l5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -222,13 +207,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-05-29T18:59:42.196990Z' + timestamp: '2024-06-11T11:58:12Z' - torchscript_onnx_tflite: - inference_time: 6002.0 - throughput: 166.61112962345885 + inference_time: 5837.0 + throughput: 171.32088401576152 estimated_peak_memory_range: - min: 3334144 - max: 11969392 + min: 249856 + max: 7133040 primary_compute_unit: NPU precision: int8 layer_info: @@ -236,7 +221,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 11 - job_id: jopryd9eg + job_id: jo5mvdm95 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -245,13 +230,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-05-29T18:59:42.197005Z' + timestamp: '2024-06-11T11:58:13Z' - torchscript_onnx_qnn: - inference_time: 740.0 - throughput: 1351.3513513513512 + inference_time: 718.0 + throughput: 1392.757660167131 estimated_peak_memory_range: - min: 49152 - max: 49152 + min: 1077248 + max: 1077248 primary_compute_unit: NPU precision: int8 layer_info: @@ -259,22 +244,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 8 - job_id: j2p0r9kep + job_id: jep23vwqg job_status: Passed torchscript_onnx_ort: - inference_time: 1038.0 - throughput: 963.3911368015414 + inference_time: 698.0 + throughput: 1432.6647564469913 estimated_peak_memory_range: - min: 8916992 - max: 8916992 + min: 7000064 + max: 7000064 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 14 + layers_on_npu: 12 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 14 - job_id: jw561mv7p + total_layers: 12 + job_id: jogkr94n5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -283,4 +268,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:42.197032Z' + timestamp: '2024-06-11T11:58:20Z' diff --git a/qai_hub_models/models/quicksrnetsmall_quantized/test.py b/qai_hub_models/models/quicksrnetsmall_quantized/test.py index b23accfd..08fe4cf3 100644 --- a/qai_hub_models/models/quicksrnetsmall_quantized/test.py +++ b/qai_hub_models/models/quicksrnetsmall_quantized/test.py @@ -2,30 +2,23 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- -import os -import zipfile import numpy as np import pytest import torch from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp -from qai_hub_models.models.quicksrnetsmall_quantized.demo import IMAGE_ADDRESS +from qai_hub_models.models._shared.super_resolution.demo import IMAGE_ADDRESS +from qai_hub_models.models.quicksrnetsmall.model import MODEL_ASSET_VERSION, MODEL_ID from qai_hub_models.models.quicksrnetsmall_quantized.demo import main as demo_main from qai_hub_models.models.quicksrnetsmall_quantized.model import ( - MODEL_ASSET_VERSION, - MODEL_ID, QuickSRNetSmallQuantizable, ) -from qai_hub_models.utils.asset_loaders import ( - CachedWebModelAsset, - load_image, - qaihm_temp_dir, -) +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image from qai_hub_models.utils.testing import assert_most_close, skip_clone_repo_check OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetsmall_quantized_output.png" + MODEL_ID, MODEL_ASSET_VERSION, "quicksrnetsmall_demo_output.png" ) @@ -67,24 +60,6 @@ def test_trace(): ) -@pytest.mark.skip("https://github.com/tetraai/tetracode/issues/9606") -@skip_clone_repo_check -def test_aimet_export(): - model = QuickSRNetSmallQuantizable.from_pretrained() - name = model.__class__.__name__ - with qaihm_temp_dir() as tmpdir: - output_zip = model.convert_to_onnx_and_aimet_encodings( - tmpdir, - ) - assert os.path.exists(output_zip) - with zipfile.ZipFile(output_zip, "r") as zip: - assert zip.namelist() == [ - f"{name}.aimet/", - f"{name}.aimet/{name}.onnx", - f"{name}.aimet/{name}.encodings", - ] - - @skip_clone_repo_check def test_demo(): demo_main(is_test=True) diff --git a/qai_hub_models/models/real_esrgan_general_x4v3/README.md b/qai_hub_models/models/real_esrgan_general_x4v3/README.md index c25f5606..11cbbee5 100644 --- a/qai_hub_models/models/real_esrgan_general_x4v3/README.md +++ b/qai_hub_models/models/real_esrgan_general_x4v3/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Real-ESRGAN-General-x4v3 can be found [here](https://github.com/xinntao/Real-ESRGAN/blob/master/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data](https://arxiv.org/abs/2107.10833) diff --git a/qai_hub_models/models/real_esrgan_general_x4v3/export.py b/qai_hub_models/models/real_esrgan_general_x4v3/export.py index 588420bb..4aa9e1fd 100644 --- a/qai_hub_models/models/real_esrgan_general_x4v3/export.py +++ b/qai_hub_models/models/real_esrgan_general_x4v3/export.py @@ -187,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml b/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml index 5a0a2c77..6f9afa6c 100644 --- a/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml +++ b/qai_hub_models/models/real_esrgan_general_x4v3/perf.yaml @@ -36,11 +36,11 @@ models: - name: Real-ESRGAN-General-x4v3 performance_metrics: - torchscript_onnx_tflite: - inference_time: 7495.0 - throughput: 133.422281521014 + inference_time: 7261.0 + throughput: 137.72207684891887 estimated_peak_memory_range: - min: 15757312 - max: 17670976 + min: 17604608 + max: 25105264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: jwgovwmd5 + job_id: jmg994wwg job_status: Passed torchscript_onnx_qnn: - inference_time: 6301.0 - throughput: 158.70496746548167 + inference_time: 6295.0 + throughput: 158.85623510722795 estimated_peak_memory_range: - min: 20480 - max: 113174448 + min: 221184 + max: 4921640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jlpevx205 + job_id: jz57vdxv5 job_status: Passed torchscript_onnx_ort: - inference_time: 6839.0 - throughput: 146.22020763269484 + inference_time: 6938.0 + throughput: 144.13375612568464 estimated_peak_memory_range: min: 6332416 - max: 18024640 + max: 55155560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 74 - job_id: jvgdv40lg + job_id: jegnre3k5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:42.240474Z' + timestamp: '2024-06-08T22:59:37Z' - torchscript_onnx_tflite: - inference_time: 5502.0 - throughput: 181.75209014903672 + inference_time: 5588.0 + throughput: 178.9549033643522 estimated_peak_memory_range: - min: 16384 - max: 54712192 + min: 20480 + max: 56093568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: j1pvwm4mg + job_id: jnp1q8e8g job_status: Passed torchscript_onnx_qnn: - inference_time: 4595.0 - throughput: 217.6278563656148 + inference_time: 4604.0 + throughput: 217.2024326672459 estimated_peak_memory_range: - min: 0 - max: 34446896 + min: 208896 + max: 37726496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jygz7yw6p + job_id: jqp4jwv8p job_status: Passed torchscript_onnx_ort: - inference_time: 5168.0 - throughput: 193.4984520123839 + inference_time: 5181.0 + throughput: 193.01293186643505 estimated_peak_memory_range: min: 2310144 - max: 34782944 + max: 36289552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 74 - job_id: jz5w9z86p + job_id: jopr1ye0g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:42.240532Z' + timestamp: '2024-06-08T22:59:38Z' - torchscript_onnx_tflite: - inference_time: 7428.0 - throughput: 134.62574044157242 + inference_time: 7376.0 + throughput: 135.57483731019522 estimated_peak_memory_range: - min: 6606848 - max: 15301872 + min: 6377472 + max: 7904672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 72 - job_id: j7gjly18p + job_id: jvgd7vorg job_status: Passed torchscript_onnx_qnn: - inference_time: 6290.0 - throughput: 158.9825119236884 + inference_time: 6271.0 + throughput: 159.46420028703557 estimated_peak_memory_range: - min: 20480 - max: 8513144 + min: 131072 + max: 5213032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jnp1813lg + job_id: jo5mvz3d5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:42.240569Z' + timestamp: '2024-06-08T22:59:36Z' - torchscript_onnx_qnn: - inference_time: 9188.0 - throughput: 108.837614279495 + inference_time: 8670.0 + throughput: 115.34025374855824 estimated_peak_memory_range: - min: 229376 - max: 229376 + min: 208896 + max: 208896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 72 - job_id: jz5w9zxjp + job_id: j0pxe1y35 job_status: Passed torchscript_onnx_ort: - inference_time: 7242.0 - throughput: 138.08340237503452 + inference_time: 7041.0 + throughput: 142.02528049992898 estimated_peak_memory_range: - min: 8654848 - max: 8654848 + min: 8646656 + max: 8646656 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 74 - job_id: jmg942kl5 + job_id: jep23mlrg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:42.240608Z' + timestamp: '2024-06-08T22:59:39Z' diff --git a/qai_hub_models/models/real_esrgan_x4plus/README.md b/qai_hub_models/models/real_esrgan_x4plus/README.md index 89551a63..3c6db231 100644 --- a/qai_hub_models/models/real_esrgan_x4plus/README.md +++ b/qai_hub_models/models/real_esrgan_x4plus/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Real-ESRGAN-x4plus can be found [here](https://github.com/xinntao/Real-ESRGAN/blob/master/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data](https://arxiv.org/abs/2107.10833) diff --git a/qai_hub_models/models/real_esrgan_x4plus/export.py b/qai_hub_models/models/real_esrgan_x4plus/export.py index 33718c26..ab0454e1 100644 --- a/qai_hub_models/models/real_esrgan_x4plus/export.py +++ b/qai_hub_models/models/real_esrgan_x4plus/export.py @@ -172,7 +172,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/real_esrgan_x4plus/perf.yaml b/qai_hub_models/models/real_esrgan_x4plus/perf.yaml index 4a878b9f..c2ee5598 100644 --- a/qai_hub_models/models/real_esrgan_x4plus/perf.yaml +++ b/qai_hub_models/models/real_esrgan_x4plus/perf.yaml @@ -36,11 +36,11 @@ models: - name: Real-ESRGAN-x4plus performance_metrics: - torchscript_onnx_tflite: - inference_time: 68352.0 - throughput: 14.630149812734082 + inference_time: 71761.0 + throughput: 13.935145831301124 estimated_peak_memory_range: - min: 3293184 - max: 6645752 + min: 4210688 + max: 13102152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jvgdv48eg + job_id: j2p0erl95 job_status: Passed torchscript_onnx_qnn: - inference_time: 65610.0 - throughput: 15.241579027587258 + inference_time: 70398.0 + throughput: 14.204949004233075 estimated_peak_memory_range: - min: 118784 - max: 55504888 + min: 12288 + max: 106397920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1031 - job_id: j0px1r31g + job_id: jn5q923np job_status: Passed torchscript_onnx_ort: - inference_time: 70866.0 - throughput: 14.111139333389778 + inference_time: 65953.0 + throughput: 15.162312555911027 estimated_peak_memory_range: - min: 0 - max: 139023520 + min: 6344704 + max: 155593192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1030 - job_id: jep2md445 + job_id: jwgoev3qp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:42.274898Z' + timestamp: '2024-06-08T23:00:20Z' - torchscript_onnx_tflite: - inference_time: 53158.0 - throughput: 18.8118439369427 + inference_time: 52163.0 + throughput: 19.170676533174856 estimated_peak_memory_range: - min: 3264512 - max: 586204928 + min: 77824 + max: 586842272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jz57dnkl5 + job_id: j1p8w7zkp job_status: Passed torchscript_onnx_qnn: - inference_time: 50734.0 - throughput: 19.710647691883157 + inference_time: 50801.0 + throughput: 19.684651876931557 estimated_peak_memory_range: - min: 69632 - max: 262891120 + min: 102400 + max: 264449376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1031 - job_id: jo5mzkowp + job_id: j1glek3jp job_status: Passed torchscript_onnx_ort: - inference_time: 52204.0 - throughput: 19.155620258983987 + inference_time: 51691.0 + throughput: 19.34572749608249 estimated_peak_memory_range: - min: 6447104 - max: 191489024 + min: 6029312 + max: 190175536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1030 - job_id: jqpyd2q7p + job_id: j1pvzwwkg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:42.275248Z' + timestamp: '2024-06-08T23:00:21Z' - torchscript_onnx_tflite: - inference_time: 65521.0 - throughput: 15.262282321698386 + inference_time: 67995.0 + throughput: 14.706963747334363 estimated_peak_memory_range: - min: 3284992 - max: 6005408 + min: 1552384 + max: 4034000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1028 - job_id: jqp4w4mvg + job_id: jogkry3w5 job_status: Passed torchscript_onnx_qnn: - inference_time: 66374.0 - throughput: 15.066140356163558 + inference_time: 69414.0 + throughput: 14.406315728815512 estimated_peak_memory_range: - min: 131072 - max: 55641936 + min: 0 + max: 56605216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1031 - job_id: joprydo9g + job_id: j1p3qme35 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:42.275481Z' + timestamp: '2024-06-08T23:00:19Z' - torchscript_onnx_qnn: - inference_time: 73922.0 - throughput: 13.527772516977354 + inference_time: 73906.0 + throughput: 13.530701160934159 estimated_peak_memory_range: min: 212992 max: 212992 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1030 - job_id: jegneqorg + job_id: jw56q1n6g job_status: Passed torchscript_onnx_ort: - inference_time: 65841.0 - throughput: 15.18810467641743 + inference_time: 65787.0 + throughput: 15.20057154148996 estimated_peak_memory_range: - min: 1998848 - max: 1998848 + min: 233472 + max: 233472 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1030 - job_id: j2p0r9d6p + job_id: j7gjkllv5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:42.275721Z' + timestamp: '2024-06-08T23:00:22Z' diff --git a/qai_hub_models/models/regnet/README.md b/qai_hub_models/models/regnet/README.md index 96c82923..3caff192 100644 --- a/qai_hub_models/models/regnet/README.md +++ b/qai_hub_models/models/regnet/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of RegNet can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Designing Network Design Spaces](https://arxiv.org/abs/2003.13678) diff --git a/qai_hub_models/models/regnet/evaluate.py b/qai_hub_models/models/regnet/evaluate.py new file mode 100644 index 00000000..7f135792 --- /dev/null +++ b/qai_hub_models/models/regnet/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.regnet import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/regnet/export.py b/qai_hub_models/models/regnet/export.py index ab9a56b7..48731313 100644 --- a/qai_hub_models/models/regnet/export.py +++ b/qai_hub_models/models/regnet/export.py @@ -188,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/regnet/info.yaml b/qai_hub_models/models/regnet/info.yaml index 863f909d..19bc7cde 100644 --- a/qai_hub_models/models/regnet/info.yaml +++ b/qai_hub_models/models/regnet/info.yaml @@ -40,3 +40,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/regnet/perf.yaml b/qai_hub_models/models/regnet/perf.yaml index 77a8aad9..6dbdb072 100644 --- a/qai_hub_models/models/regnet/perf.yaml +++ b/qai_hub_models/models/regnet/perf.yaml @@ -36,11 +36,11 @@ models: - name: RegNet performance_metrics: - torchscript_onnx_tflite: - inference_time: 2323.0 - throughput: 430.4778303917348 + inference_time: 2344.0 + throughput: 426.6211604095563 estimated_peak_memory_range: - min: 24576 - max: 2684712 + min: 40960 + max: 2564000 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 114 - job_id: jogky0o2p + job_id: jygzv77op job_status: Passed torchscript_onnx_qnn: - inference_time: 2114.0 - throughput: 473.0368968779565 + inference_time: 2105.0 + throughput: 475.05938242280286 estimated_peak_memory_range: min: 16384 - max: 76878488 + max: 66214464 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: jw561mr0p + job_id: jnp1q888g job_status: Passed torchscript_onnx_ort: - inference_time: 2340.0 - throughput: 427.35042735042737 + inference_time: 2313.0 + throughput: 432.33895373973195 estimated_peak_memory_range: - min: 12288 - max: 74190896 + min: 16384 + max: 109504192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 190 - job_id: j7gjlyoxp + job_id: jnp1q887g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:42.309964Z' + timestamp: '2024-06-08T23:00:51Z' - torchscript_onnx_tflite: - inference_time: 1626.0 - throughput: 615.0061500615006 + inference_time: 1623.0 + throughput: 616.1429451632779 estimated_peak_memory_range: - min: 12288 - max: 133396720 + min: 16384 + max: 137911392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 114 - job_id: jn5q21z45 + job_id: jz5wm993g job_status: Passed torchscript_onnx_qnn: - inference_time: 1479.0 - throughput: 676.132521974307 + inference_time: 1494.0 + throughput: 669.3440428380187 estimated_peak_memory_range: min: 618496 - max: 74924816 + max: 75619760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: j1p3m7xlg + job_id: jvgd7vvrg job_status: Passed torchscript_onnx_ort: - inference_time: 1679.0 - throughput: 595.5926146515783 + inference_time: 1637.0 + throughput: 610.8735491753207 estimated_peak_memory_range: min: 0 - max: 39651088 + max: 37581584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 190 - job_id: jlpevx815 + job_id: jvgd7vvzg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:42.310050Z' + timestamp: '2024-06-08T23:00:52Z' - torchscript_onnx_tflite: - inference_time: 2328.0 - throughput: 429.553264604811 + inference_time: 2318.0 + throughput: 431.40638481449525 estimated_peak_memory_range: - min: 24576 - max: 2351904 + min: 16384 + max: 2479152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 114 - job_id: j1glk8o8p + job_id: jmg9944wg job_status: Passed torchscript_onnx_qnn: - inference_time: 2110.0 - throughput: 473.93364928909955 + inference_time: 2102.0 + throughput: 475.7373929590866 estimated_peak_memory_range: - min: 16384 - max: 65823536 + min: 12288 + max: 14056768 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: j1pvwmejg + job_id: jmg99448g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:42.310103Z' + timestamp: '2024-06-08T23:00:50Z' - torchscript_onnx_qnn: - inference_time: 2509.0 - throughput: 398.5651654045436 + inference_time: 2475.0 + throughput: 404.04040404040404 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 188 - job_id: jwgovwox5 + job_id: jz5wm99mg job_status: Passed torchscript_onnx_ort: - inference_time: 2177.0 - throughput: 459.34772622875516 + inference_time: 2215.0 + throughput: 451.46726862302484 estimated_peak_memory_range: - min: 37945344 - max: 37945344 + min: 651264 + max: 651264 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 190 - job_id: jygz7y8kp + job_id: jz57vdd95 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:42.310167Z' + timestamp: '2024-06-08T23:00:53Z' diff --git a/qai_hub_models/models/resnet101/README.md b/qai_hub_models/models/resnet101/README.md index 218a6131..3557c576 100644 --- a/qai_hub_models/models/resnet101/README.md +++ b/qai_hub_models/models/resnet101/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of ResNet101 can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) diff --git a/qai_hub_models/models/resnet101/evaluate.py b/qai_hub_models/models/resnet101/evaluate.py new file mode 100644 index 00000000..9f91113a --- /dev/null +++ b/qai_hub_models/models/resnet101/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.resnet101 import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/resnet101/export.py b/qai_hub_models/models/resnet101/export.py index 82072425..337029e4 100644 --- a/qai_hub_models/models/resnet101/export.py +++ b/qai_hub_models/models/resnet101/export.py @@ -188,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/resnet101/info.yaml b/qai_hub_models/models/resnet101/info.yaml index f1410fb4..abbc258b 100644 --- a/qai_hub_models/models/resnet101/info.yaml +++ b/qai_hub_models/models/resnet101/info.yaml @@ -40,3 +40,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/resnet101/perf.yaml b/qai_hub_models/models/resnet101/perf.yaml index 1fbddfb5..c30b049f 100644 --- a/qai_hub_models/models/resnet101/perf.yaml +++ b/qai_hub_models/models/resnet101/perf.yaml @@ -36,11 +36,11 @@ models: - name: ResNet101 performance_metrics: - torchscript_onnx_tflite: - inference_time: 3398.0 - throughput: 294.2907592701589 + inference_time: 3383.0 + throughput: 295.5956251847473 estimated_peak_memory_range: min: 16384 - max: 2088632 + max: 2493664 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jn5q21m45 + job_id: j1pvzw1rg job_status: Passed torchscript_onnx_qnn: - inference_time: 3467.0 - throughput: 288.43380444188057 + inference_time: 3448.0 + throughput: 290.0232018561485 estimated_peak_memory_range: - min: 618496 - max: 173749104 + min: 16384 + max: 173843416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j1p3m7wlg + job_id: jygzv7xxp job_status: Passed torchscript_onnx_ort: - inference_time: 3578.0 - throughput: 279.4857462269424 + inference_time: 3614.0 + throughput: 276.70171555063644 estimated_peak_memory_range: - min: 16384 - max: 233232480 + min: 45056 + max: 355647168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: jlpevxl15 + job_id: jvgd7vrzg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:42.388058Z' + timestamp: '2024-06-08T23:02:27Z' - torchscript_onnx_tflite: - inference_time: 2447.0 - throughput: 408.6636697997548 + inference_time: 2440.0 + throughput: 409.8360655737705 estimated_peak_memory_range: - min: 12288 - max: 107056384 + min: 16384 + max: 109471344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: j1glk818p + job_id: j7gjkl0e5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2520.0 - throughput: 396.8253968253968 + inference_time: 2514.0 + throughput: 397.77247414478916 estimated_peak_memory_range: min: 618496 - max: 83774768 + max: 81083536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jwgovw4x5 + job_id: jz5wm9dmg job_status: Passed torchscript_onnx_ort: - inference_time: 2587.0 - throughput: 386.5481252415926 + inference_time: 2575.0 + throughput: 388.3495145631068 estimated_peak_memory_range: min: 618496 - max: 48055616 + max: 46866960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: jygz7y4kp + job_id: jz57vdj95 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:42.388157Z' + timestamp: '2024-06-08T23:02:28Z' - torchscript_onnx_tflite: - inference_time: 3376.0 - throughput: 296.2085308056872 + inference_time: 3388.0 + throughput: 295.159386068477 estimated_peak_memory_range: - min: 45056 - max: 2154704 + min: 28672 + max: 1888064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jw561md0p + job_id: jlpe4vrv5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3465.0 - throughput: 288.6002886002886 + inference_time: 3458.0 + throughput: 289.1844997108155 estimated_peak_memory_range: - min: 618496 - max: 163344000 + min: 626688 + max: 163514888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j7gjlywxp + job_id: jnp1q8d7g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:42.388219Z' + timestamp: '2024-06-08T23:02:26Z' - torchscript_onnx_qnn: - inference_time: 4026.0 - throughput: 248.38549428713364 + inference_time: 4024.0 + throughput: 248.5089463220676 estimated_peak_memory_range: - min: 602112 - max: 602112 + min: 1011712 + max: 1011712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j1pvwm9jg + job_id: jmg99438g job_status: Passed torchscript_onnx_ort: - inference_time: 3496.0 - throughput: 286.0411899313501 + inference_time: 3536.0 + throughput: 282.80542986425337 estimated_peak_memory_range: - min: 43966464 - max: 43966464 + min: 43122688 + max: 43122688 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: jz5w9z46p + job_id: jqp4jwx1p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:42.388290Z' + timestamp: '2024-06-08T23:02:29Z' diff --git a/qai_hub_models/models/resnet101_quantized/README.md b/qai_hub_models/models/resnet101_quantized/README.md index 0d1ec373..4c46a553 100644 --- a/qai_hub_models/models/resnet101_quantized/README.md +++ b/qai_hub_models/models/resnet101_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of ResNet101Quantized can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) diff --git a/qai_hub_models/models/resnet101_quantized/evaluate.py b/qai_hub_models/models/resnet101_quantized/evaluate.py new file mode 100644 index 00000000..fde921e3 --- /dev/null +++ b/qai_hub_models/models/resnet101_quantized/evaluate.py @@ -0,0 +1,62 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.resnet101_quantized import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.base_model import BaseModel +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args +from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + + # Use Fp16 model for torch inference + for cls in Model.__mro__: + if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin): + torch_cls = cls + break + torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/resnet101_quantized/export.py b/qai_hub_models/models/resnet101_quantized/export.py index feb84bd3..109a3790 100644 --- a/qai_hub_models/models/resnet101_quantized/export.py +++ b/qai_hub_models/models/resnet101_quantized/export.py @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/resnet101_quantized/info.yaml b/qai_hub_models/models/resnet101_quantized/info.yaml index e25f53d7..2da5c904 100644 --- a/qai_hub_models/models/resnet101_quantized/info.yaml +++ b/qai_hub_models/models/resnet101_quantized/info.yaml @@ -41,3 +41,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/resnet101_quantized/perf.yaml b/qai_hub_models/models/resnet101_quantized/perf.yaml index 6dea8107..9bf5bafe 100644 --- a/qai_hub_models/models/resnet101_quantized/perf.yaml +++ b/qai_hub_models/models/resnet101_quantized/perf.yaml @@ -42,11 +42,11 @@ models: - name: ResNet101Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1181.0 - throughput: 846.740050804403 + inference_time: 1188.0 + throughput: 841.7508417508418 estimated_peak_memory_range: - min: 16384 - max: 1842712 + min: 0 + max: 1614400 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +54,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 148 - job_id: jvgdv42eg + job_id: jo5mvzw95 job_status: Passed torchscript_onnx_qnn: - inference_time: 1388.0 - throughput: 720.4610951008646 + inference_time: 1377.0 + throughput: 726.2164124909223 estimated_peak_memory_range: - min: 16384 - max: 110788816 + min: 12288 + max: 58349752 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +69,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: joprydw9g + job_id: j2p0er1n5 job_status: Passed torchscript_onnx_ort: - inference_time: 1548.0 - throughput: 645.9948320413437 + inference_time: 1486.0 + throughput: 672.9475100942127 estimated_peak_memory_range: min: 12288 - max: 152332600 + max: 87121872 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 154 + layers_on_npu: 151 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 154 - job_id: jogky082p + total_layers: 151 + job_id: j1glekemp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +93,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:42.422642Z' + timestamp: '2024-06-08T23:04:23Z' - torchscript_onnx_tflite: - inference_time: 936.0 - throughput: 1068.3760683760684 + inference_time: 927.0 + throughput: 1078.7486515641856 estimated_peak_memory_range: min: 12288 - max: 92834304 + max: 93411600 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +107,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 148 - job_id: jz57dn9l5 + job_id: jegnre9q5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1060.0 - throughput: 943.3962264150944 + inference_time: 1078.0 + throughput: 927.643784786642 estimated_peak_memory_range: - min: 167936 - max: 65745760 + min: 163840 + max: 66249856 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +122,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jep2mde45 + job_id: j1p8w73op job_status: Passed torchscript_onnx_ort: - inference_time: 1252.0 - throughput: 798.7220447284345 + inference_time: 1162.0 + throughput: 860.5851979345955 estimated_peak_memory_range: min: 0 - max: 43619040 + max: 47460512 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 154 + layers_on_npu: 151 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 154 - job_id: jn5q21v45 + total_layers: 151 + job_id: jw56q1qyg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +146,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:42.422723Z' + timestamp: '2024-06-08T23:04:24Z' - torchscript_onnx_tflite: - inference_time: 1182.0 - throughput: 846.0236886632825 + inference_time: 1171.0 + throughput: 853.9709649871904 estimated_peak_memory_range: - min: 32768 - max: 1724248 + min: 12288 + max: 1692848 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +160,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 148 - job_id: jqp4w43vg + job_id: jopr1y47g job_status: Passed torchscript_onnx_qnn: - inference_time: 1371.0 - throughput: 729.3946024799417 + inference_time: 1379.0 + throughput: 725.1631617113851 estimated_peak_memory_range: min: 12288 - max: 113245424 + max: 47947408 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +175,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: j2p0r966p + job_id: jn5q927op job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +184,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:42.422775Z' + timestamp: '2024-06-08T23:04:22Z' - torchscript_onnx_tflite: - inference_time: 4715.0 - throughput: 212.08907741251326 + inference_time: 4690.0 + throughput: 213.21961620469082 estimated_peak_memory_range: - min: 49152 - max: 27288944 + min: 12288 + max: 30183472 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,22 +198,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 148 - job_id: j0px1rx1g - job_status: Passed - torchscript_onnx_qnn: - inference_time: 5203.0 - throughput: 192.19680953296177 - estimated_peak_memory_range: - min: 163840 - max: 60211840 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 146 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 146 - job_id: j1p87r1x5 + job_id: jep23m7qg job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -222,13 +207,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-05-29T18:59:42.422828Z' + timestamp: '2024-06-08T23:04:17Z' - torchscript_onnx_tflite: - inference_time: 17085.0 - throughput: 58.5308750365818 + inference_time: 17058.0 + throughput: 58.62351975612616 estimated_peak_memory_range: - min: 12288 - max: 2437568 + min: 40960 + max: 1956688 primary_compute_unit: NPU precision: int8 layer_info: @@ -236,7 +221,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 148 - job_id: jegneqkrg + job_id: jqpyvd4lp job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -245,13 +230,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-05-29T18:59:42.422856Z' + timestamp: '2024-06-08T23:04:18Z' - torchscript_onnx_qnn: - inference_time: 1424.0 - throughput: 702.2471910112359 + inference_time: 1381.0 + throughput: 724.112961622013 estimated_peak_memory_range: - min: 495616 - max: 495616 + min: 270336 + max: 270336 primary_compute_unit: NPU precision: int8 layer_info: @@ -259,22 +244,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jqpyd2m7p + job_id: jogkryln5 job_status: Passed torchscript_onnx_ort: - inference_time: 1429.0 - throughput: 699.7900629811056 + inference_time: 1313.0 + throughput: 761.6146230007616 estimated_peak_memory_range: - min: 50556928 - max: 50556928 + min: 24576 + max: 24576 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 154 + layers_on_npu: 151 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 154 - job_id: j1glk8l8p + total_layers: 151 + job_id: j1p3qmqn5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -283,4 +268,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:42.422910Z' + timestamp: '2024-06-08T23:04:25Z' diff --git a/qai_hub_models/models/resnet18/README.md b/qai_hub_models/models/resnet18/README.md index de48498f..2b9ced95 100644 --- a/qai_hub_models/models/resnet18/README.md +++ b/qai_hub_models/models/resnet18/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of ResNet18 can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) diff --git a/qai_hub_models/models/resnet18/evaluate.py b/qai_hub_models/models/resnet18/evaluate.py new file mode 100644 index 00000000..129a0a90 --- /dev/null +++ b/qai_hub_models/models/resnet18/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.resnet18 import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/resnet18/export.py b/qai_hub_models/models/resnet18/export.py index 0bcf2350..b4ca9324 100644 --- a/qai_hub_models/models/resnet18/export.py +++ b/qai_hub_models/models/resnet18/export.py @@ -188,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/resnet18/info.yaml b/qai_hub_models/models/resnet18/info.yaml index 8b06179f..9ddcdde0 100644 --- a/qai_hub_models/models/resnet18/info.yaml +++ b/qai_hub_models/models/resnet18/info.yaml @@ -40,3 +40,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/resnet18/perf.yaml b/qai_hub_models/models/resnet18/perf.yaml index 7e1fee47..ff6f698a 100644 --- a/qai_hub_models/models/resnet18/perf.yaml +++ b/qai_hub_models/models/resnet18/perf.yaml @@ -36,11 +36,11 @@ models: - name: ResNet18 performance_metrics: - torchscript_onnx_tflite: - inference_time: 1408.0 - throughput: 710.2272727272727 + inference_time: 1416.0 + throughput: 706.2146892655368 estimated_peak_memory_range: - min: 57344 - max: 1345984 + min: 61440 + max: 1999640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: j1p3m76lg + job_id: j1pvzwzrg job_status: Passed torchscript_onnx_qnn: - inference_time: 1468.0 - throughput: 681.1989100817439 + inference_time: 1446.0 + throughput: 691.5629322268327 estimated_peak_memory_range: - min: 135168 - max: 83377024 + min: 86016 + max: 83516488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: j7gjlyqxp + job_id: jygzv7vxp job_status: Passed torchscript_onnx_ort: - inference_time: 1383.0 - throughput: 723.0657989877079 + inference_time: 1350.0 + throughput: 740.7407407407408 estimated_peak_memory_range: - min: 12288 - max: 101086224 + min: 24576 + max: 88328320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: jnp181j2g + job_id: jvgd7v7zg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:42.466422Z' + timestamp: '2024-06-08T23:04:50Z' - torchscript_onnx_tflite: - inference_time: 980.0 - throughput: 1020.4081632653061 + inference_time: 989.0 + throughput: 1011.1223458038422 estimated_peak_memory_range: min: 12288 - max: 24571312 + max: 25458704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: jwgovw8x5 + job_id: j7gjklke5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1011.0 - throughput: 989.1196834817013 + inference_time: 1017.0 + throughput: 983.284169124877 estimated_peak_memory_range: min: 618496 - max: 27709520 + max: 29899792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jlpevxy15 + job_id: jz5wm9mmg job_status: Passed torchscript_onnx_ort: - inference_time: 982.0 - throughput: 1018.3299389002036 + inference_time: 978.0 + throughput: 1022.4948875255624 estimated_peak_memory_range: min: 0 - max: 17788432 + max: 16899936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: jvgdv43eg + job_id: jz57vdv95 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:42.466474Z' + timestamp: '2024-06-08T23:04:51Z' - torchscript_onnx_tflite: - inference_time: 1394.0 - throughput: 717.3601147776184 + inference_time: 1412.0 + throughput: 708.2152974504249 estimated_peak_memory_range: min: 28672 - max: 1904504 + max: 154269408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 38 - job_id: j1pvwm7jg + job_id: jlpe4v4v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1469.0 - throughput: 680.7351940095303 + inference_time: 1473.0 + throughput: 678.8866259334691 estimated_peak_memory_range: min: 16384 - max: 83829944 + max: 72911032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jmg942ml5 + job_id: jnp1q8q7g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:42.466506Z' + timestamp: '2024-06-08T23:04:49Z' - torchscript_onnx_qnn: - inference_time: 1561.0 - throughput: 640.6149903907751 + inference_time: 1575.0 + throughput: 634.9206349206349 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jz5w9z76p + job_id: jmg99498g job_status: Passed torchscript_onnx_ort: - inference_time: 1312.0 - throughput: 762.1951219512196 + inference_time: 1324.0 + throughput: 755.2870090634441 estimated_peak_memory_range: - min: 32391168 - max: 32391168 + min: 28278784 + max: 28278784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 55 - job_id: jz57dn4l5 + job_id: jqp4jwj1p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:42.466542Z' + timestamp: '2024-06-08T23:04:52Z' diff --git a/qai_hub_models/models/resnet18_quantized/README.md b/qai_hub_models/models/resnet18_quantized/README.md index 12cd6d77..266febea 100644 --- a/qai_hub_models/models/resnet18_quantized/README.md +++ b/qai_hub_models/models/resnet18_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of ResNet18Quantized can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) diff --git a/qai_hub_models/models/resnet18_quantized/evaluate.py b/qai_hub_models/models/resnet18_quantized/evaluate.py new file mode 100644 index 00000000..d98aec44 --- /dev/null +++ b/qai_hub_models/models/resnet18_quantized/evaluate.py @@ -0,0 +1,62 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.resnet18_quantized import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.base_model import BaseModel +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args +from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + + # Use Fp16 model for torch inference + for cls in Model.__mro__: + if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin): + torch_cls = cls + break + torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/resnet18_quantized/export.py b/qai_hub_models/models/resnet18_quantized/export.py index 22973f71..4ed971cc 100644 --- a/qai_hub_models/models/resnet18_quantized/export.py +++ b/qai_hub_models/models/resnet18_quantized/export.py @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/resnet18_quantized/info.yaml b/qai_hub_models/models/resnet18_quantized/info.yaml index f132fa42..c9d5ab07 100644 --- a/qai_hub_models/models/resnet18_quantized/info.yaml +++ b/qai_hub_models/models/resnet18_quantized/info.yaml @@ -41,3 +41,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/resnet18_quantized/perf.yaml b/qai_hub_models/models/resnet18_quantized/perf.yaml index 6c6466e8..a76fba6f 100644 --- a/qai_hub_models/models/resnet18_quantized/perf.yaml +++ b/qai_hub_models/models/resnet18_quantized/perf.yaml @@ -42,11 +42,11 @@ models: - name: ResNet18Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 433.0 - throughput: 2309.4688221709007 + inference_time: 420.0 + throughput: 2380.9523809523807 estimated_peak_memory_range: min: 12288 - max: 1345424 + max: 1492608 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +54,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 39 - job_id: jopryd09g + job_id: jo5mvzv95 job_status: Passed torchscript_onnx_qnn: - inference_time: 626.0 - throughput: 1597.444089456869 + inference_time: 639.0 + throughput: 1564.9452269170579 estimated_peak_memory_range: - min: 20480 - max: 169841240 + min: 24576 + max: 9441728 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +69,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 37 - job_id: j1glk8x8p + job_id: j2p0eren5 job_status: Passed torchscript_onnx_ort: - inference_time: 720.0 - throughput: 1388.888888888889 + inference_time: 641.0 + throughput: 1560.0624024960998 estimated_peak_memory_range: - min: 319488 - max: 21949752 + min: 12288 + max: 25595784 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 45 + layers_on_npu: 42 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 45 - job_id: j7gjly7xp + total_layers: 42 + job_id: j1glek7mp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +93,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:42.500897Z' + timestamp: '2024-06-08T23:05:34Z' - torchscript_onnx_tflite: - inference_time: 342.0 - throughput: 2923.9766081871344 + inference_time: 352.0 + throughput: 2840.909090909091 estimated_peak_memory_range: - min: 12288 - max: 23567296 + min: 16384 + max: 24707232 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +107,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 39 - job_id: jqpyd2x7p + job_id: jegnrerq5 job_status: Passed torchscript_onnx_qnn: inference_time: 475.0 throughput: 2105.2631578947367 estimated_peak_memory_range: min: 163840 - max: 27991680 + max: 28038704 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +122,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 37 - job_id: jw561m70p + job_id: j1p8w7wop job_status: Passed torchscript_onnx_ort: - inference_time: 587.0 - throughput: 1703.5775127768313 + inference_time: 479.0 + throughput: 2087.6826722338205 estimated_peak_memory_range: - min: 618496 - max: 21565152 + min: 12288 + max: 20801936 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 45 + layers_on_npu: 42 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 45 - job_id: jlpevxz15 + total_layers: 42 + job_id: jw56q1vyg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +146,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:42.500945Z' + timestamp: '2024-06-08T23:05:35Z' - torchscript_onnx_tflite: - inference_time: 438.0 - throughput: 2283.10502283105 + inference_time: 420.0 + throughput: 2380.9523809523807 estimated_peak_memory_range: min: 12288 - max: 1304680 + max: 1230392 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +160,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 39 - job_id: j2p0r9j6p + job_id: jopr1y17g job_status: Passed torchscript_onnx_qnn: - inference_time: 627.0 - throughput: 1594.896331738437 + inference_time: 632.0 + throughput: 1582.2784810126582 estimated_peak_memory_range: - min: 28672 - max: 29355864 + min: 16384 + max: 8848856 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +175,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 37 - job_id: jwgovwrx5 + job_id: jn5q929op job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +184,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:42.500976Z' + timestamp: '2024-06-08T23:05:33Z' - torchscript_onnx_tflite: - inference_time: 1473.0 - throughput: 678.8866259334691 + inference_time: 1426.0 + throughput: 701.2622720897616 estimated_peak_memory_range: min: 12288 - max: 15538320 + max: 15923968 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,22 +198,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 39 - job_id: j1p87rxx5 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 1844.0 - throughput: 542.2993492407809 - estimated_peak_memory_range: - min: 12288 - max: 24215008 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 37 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 37 - job_id: j1pvwmdjg + job_id: jep23m3qg job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -222,13 +207,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-05-29T18:59:42.501006Z' + timestamp: '2024-06-08T23:05:28Z' - torchscript_onnx_tflite: - inference_time: 7020.0 - throughput: 142.45014245014244 + inference_time: 7066.0 + throughput: 141.52278516841213 estimated_peak_memory_range: - min: 77824 - max: 1892960 + min: 40960 + max: 6406016 primary_compute_unit: NPU precision: int8 layer_info: @@ -236,7 +221,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 39 - job_id: jn5q21y45 + job_id: jqpyvdvlp job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -245,13 +230,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-05-29T18:59:42.501023Z' + timestamp: '2024-06-08T23:05:29Z' - torchscript_onnx_qnn: - inference_time: 836.0 - throughput: 1196.1722488038276 + inference_time: 742.0 + throughput: 1347.7088948787061 estimated_peak_memory_range: - min: 552960 - max: 552960 + min: 1617920 + max: 1617920 primary_compute_unit: NPU precision: int8 layer_info: @@ -259,22 +244,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 37 - job_id: j1p3m79lg + job_id: jogkryrn5 job_status: Passed torchscript_onnx_ort: - inference_time: 721.0 - throughput: 1386.9625520110958 + inference_time: 836.0 + throughput: 1196.1722488038276 estimated_peak_memory_range: - min: 10059776 - max: 10059776 + min: 3690496 + max: 3690496 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 45 + layers_on_npu: 42 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 45 - job_id: jygz7ymkp + total_layers: 42 + job_id: j1p3qm8n5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -283,4 +268,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:42.501055Z' + timestamp: '2024-06-08T23:05:36Z' diff --git a/qai_hub_models/models/resnet50/README.md b/qai_hub_models/models/resnet50/README.md index 6abe8dfa..4ec67961 100644 --- a/qai_hub_models/models/resnet50/README.md +++ b/qai_hub_models/models/resnet50/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of ResNet50 can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) diff --git a/qai_hub_models/models/resnet50/evaluate.py b/qai_hub_models/models/resnet50/evaluate.py new file mode 100644 index 00000000..77f5289f --- /dev/null +++ b/qai_hub_models/models/resnet50/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.resnet50 import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/resnet50/export.py b/qai_hub_models/models/resnet50/export.py index 01c180e3..688099b7 100644 --- a/qai_hub_models/models/resnet50/export.py +++ b/qai_hub_models/models/resnet50/export.py @@ -188,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/resnet50/info.yaml b/qai_hub_models/models/resnet50/info.yaml index 7b52d418..4679dc8b 100644 --- a/qai_hub_models/models/resnet50/info.yaml +++ b/qai_hub_models/models/resnet50/info.yaml @@ -39,3 +39,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/resnet50/perf.yaml b/qai_hub_models/models/resnet50/perf.yaml index 8b5c0349..be4d4c11 100644 --- a/qai_hub_models/models/resnet50/perf.yaml +++ b/qai_hub_models/models/resnet50/perf.yaml @@ -36,11 +36,11 @@ models: - name: ResNet50 performance_metrics: - torchscript_onnx_tflite: - inference_time: 2279.0 - throughput: 438.7889425186485 + inference_time: 2269.0 + throughput: 440.72278536800354 estimated_peak_memory_range: min: 16384 - max: 2074480 + max: 2153680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jmg942zl5 + job_id: j1pvzw4rg job_status: Passed torchscript_onnx_qnn: - inference_time: 2376.0 - throughput: 420.8754208754209 + inference_time: 2387.0 + throughput: 418.93590280687056 estimated_peak_memory_range: min: 622592 - max: 186037320 + max: 175232184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jz5w9zl3p + job_id: jygzv7wxp job_status: Passed torchscript_onnx_ort: - inference_time: 2361.0 - throughput: 423.5493434985176 + inference_time: 2366.0 + throughput: 422.654268808115 estimated_peak_memory_range: - min: 28672 - max: 219004312 + min: 12288 + max: 261165672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jz57dnev5 + job_id: jvgd7v0zg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:42.544373Z' + timestamp: '2024-06-08T23:06:03Z' - torchscript_onnx_tflite: - inference_time: 1634.0 - throughput: 611.9951040391677 + inference_time: 1615.0 + throughput: 619.1950464396285 estimated_peak_memory_range: - min: 16384 - max: 70254992 + min: 12288 + max: 72992224 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jnp181n2g + job_id: j7gjkl1e5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1686.0 - throughput: 593.1198102016607 + inference_time: 1705.0 + throughput: 586.5102639296188 estimated_peak_memory_range: - min: 643072 - max: 53416880 + min: 0 + max: 51115584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jmg942zw5 + job_id: jz5wm9xmg job_status: Passed torchscript_onnx_ort: - inference_time: 1749.0 - throughput: 571.7552887364208 + inference_time: 1750.0 + throughput: 571.4285714285714 estimated_peak_memory_range: min: 618496 - max: 33899296 + max: 34613760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jqp4w4y8g + job_id: jz57vd695 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:42.544443Z' + timestamp: '2024-06-08T23:06:04Z' - torchscript_onnx_tflite: - inference_time: 2281.0 - throughput: 438.4042086804033 + inference_time: 2272.0 + throughput: 440.14084507042253 estimated_peak_memory_range: - min: 16384 - max: 2328304 + min: 24576 + max: 1714808 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jvgdv4deg + job_id: jlpe4v2v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2384.0 - throughput: 419.46308724832215 + inference_time: 2385.0 + throughput: 419.2872117400419 estimated_peak_memory_range: - min: 634880 - max: 185894144 + min: 622592 + max: 175433648 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jvgdv4drg + job_id: jnp1q837g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:42.544487Z' + timestamp: '2024-06-08T23:06:02Z' - torchscript_onnx_qnn: - inference_time: 2759.0 - throughput: 362.4501631025734 + inference_time: 2608.0 + throughput: 383.4355828220859 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jnp181n8g + job_id: jmg99488g job_status: Passed torchscript_onnx_ort: - inference_time: 2346.0 - throughput: 426.25745950554136 + inference_time: 2295.0 + throughput: 435.7298474945534 estimated_peak_memory_range: - min: 84422656 - max: 84422656 + min: 54059008 + max: 54059008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: j0px1rl3g + job_id: jqp4jw81p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:42.544535Z' + timestamp: '2024-06-08T23:06:05Z' diff --git a/qai_hub_models/models/resnext101/README.md b/qai_hub_models/models/resnext101/README.md index dbe49a2a..cf629f9a 100644 --- a/qai_hub_models/models/resnext101/README.md +++ b/qai_hub_models/models/resnext101/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of ResNeXt101 can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/abs/1611.05431) diff --git a/qai_hub_models/models/resnext101/evaluate.py b/qai_hub_models/models/resnext101/evaluate.py new file mode 100644 index 00000000..b34ff213 --- /dev/null +++ b/qai_hub_models/models/resnext101/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.resnext101 import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/resnext101/export.py b/qai_hub_models/models/resnext101/export.py index a0347299..e4b05a65 100644 --- a/qai_hub_models/models/resnext101/export.py +++ b/qai_hub_models/models/resnext101/export.py @@ -188,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/resnext101/info.yaml b/qai_hub_models/models/resnext101/info.yaml index 40e7612f..3587cabd 100644 --- a/qai_hub_models/models/resnext101/info.yaml +++ b/qai_hub_models/models/resnext101/info.yaml @@ -39,3 +39,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/resnext101/perf.yaml b/qai_hub_models/models/resnext101/perf.yaml index 751c754f..499cef3a 100644 --- a/qai_hub_models/models/resnext101/perf.yaml +++ b/qai_hub_models/models/resnext101/perf.yaml @@ -36,11 +36,11 @@ models: - name: ResNeXt101 performance_metrics: - torchscript_onnx_tflite: - inference_time: 6683.0 - throughput: 149.63339817447255 + inference_time: 6774.0 + throughput: 147.62326542663124 estimated_peak_memory_range: - min: 159744 - max: 2988272 + min: 24576 + max: 2449424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jo5mzkkdp + job_id: j7gjkloe5 job_status: Passed torchscript_onnx_qnn: - inference_time: 6643.0 - throughput: 150.5343971097396 + inference_time: 6930.0 + throughput: 144.3001443001443 estimated_peak_memory_range: min: 16384 - max: 35374192 + max: 36101088 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jep2mddr5 + job_id: jz5wm98mg job_status: Passed torchscript_onnx_ort: - inference_time: 6848.0 - throughput: 146.02803738317758 + inference_time: 6834.0 + throughput: 146.3271875914545 estimated_peak_memory_range: - min: 176128 - max: 448765640 + min: 159744 + max: 453366256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: jogky00wp + job_id: jz5wm984g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:42.693251Z' + timestamp: '2024-06-08T23:07:53Z' - torchscript_onnx_tflite: - inference_time: 4736.0 - throughput: 211.14864864864865 + inference_time: 4859.0 + throughput: 205.80366330520684 estimated_peak_memory_range: min: 20480 - max: 362103440 + max: 364879056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: jegneqqkg + job_id: jlpe4v8v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 4801.0 - throughput: 208.28993959591753 + inference_time: 4800.0 + throughput: 208.33333333333334 estimated_peak_memory_range: min: 0 - max: 124593744 + max: 126702208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: jqpyd228p + job_id: jmg994k8g job_status: Passed torchscript_onnx_ort: - inference_time: 5111.0 - throughput: 195.65642731363727 + inference_time: 5102.0 + throughput: 196.0015680125441 estimated_peak_memory_range: - min: 618496 - max: 92380992 + min: 0 + max: 91577616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: jn5q211n5 + job_id: jmg994kmg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:42.693354Z' + timestamp: '2024-06-08T23:07:54Z' - torchscript_onnx_tflite: - inference_time: 6723.0 - throughput: 148.74312063067083 + inference_time: 6604.0 + throughput: 151.42337976983646 estimated_peak_memory_range: - min: 32768 - max: 2943704 + min: 20480 + max: 3255112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 147 - job_id: joprydd0g + job_id: jygzv78xp job_status: Passed torchscript_onnx_qnn: - inference_time: 6632.0 - throughput: 150.78407720144753 + inference_time: 6571.0 + throughput: 152.18383807639628 estimated_peak_memory_range: - min: 20480 - max: 36799736 + min: 0 + max: 35912680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j1p87rrk5 + job_id: jvgd7v8zg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:42.693417Z' + timestamp: '2024-06-08T23:07:52Z' - torchscript_onnx_qnn: - inference_time: 9099.0 - throughput: 109.90218705352237 + inference_time: 9160.0 + throughput: 109.17030567685589 estimated_peak_memory_range: - min: 602112 - max: 602112 + min: 913408 + max: 913408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 245 - job_id: j2p0r999p + job_id: jnp1q877g job_status: Passed torchscript_onnx_ort: - inference_time: 6742.0 - throughput: 148.3239394838327 + inference_time: 6731.0 + throughput: 148.5663348685188 estimated_peak_memory_range: - min: 109170688 - max: 109170688 + min: 117399552 + max: 117399552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 247 - job_id: j1glk88jp + job_id: jnp1q87ng job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:42.693490Z' + timestamp: '2024-06-08T23:07:55Z' diff --git a/qai_hub_models/models/resnext101_quantized/README.md b/qai_hub_models/models/resnext101_quantized/README.md index 756cb042..e91fdd6a 100644 --- a/qai_hub_models/models/resnext101_quantized/README.md +++ b/qai_hub_models/models/resnext101_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of ResNeXt101Quantized can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/abs/1611.05431) diff --git a/qai_hub_models/models/resnext101_quantized/evaluate.py b/qai_hub_models/models/resnext101_quantized/evaluate.py new file mode 100644 index 00000000..9652d8f6 --- /dev/null +++ b/qai_hub_models/models/resnext101_quantized/evaluate.py @@ -0,0 +1,62 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.resnext101_quantized import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.base_model import BaseModel +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args +from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + + # Use Fp16 model for torch inference + for cls in Model.__mro__: + if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin): + torch_cls = cls + break + torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/resnext101_quantized/export.py b/qai_hub_models/models/resnext101_quantized/export.py index ac94e2ad..007bc290 100644 --- a/qai_hub_models/models/resnext101_quantized/export.py +++ b/qai_hub_models/models/resnext101_quantized/export.py @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/resnext101_quantized/info.yaml b/qai_hub_models/models/resnext101_quantized/info.yaml index 47a2496c..e1431d09 100644 --- a/qai_hub_models/models/resnext101_quantized/info.yaml +++ b/qai_hub_models/models/resnext101_quantized/info.yaml @@ -41,3 +41,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/resnext101_quantized/perf.yaml b/qai_hub_models/models/resnext101_quantized/perf.yaml index d14cbbc7..1537f3e3 100644 --- a/qai_hub_models/models/resnext101_quantized/perf.yaml +++ b/qai_hub_models/models/resnext101_quantized/perf.yaml @@ -42,11 +42,11 @@ models: - name: ResNeXt101Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 2913.0 - throughput: 343.2887058015791 + inference_time: 2846.0 + throughput: 351.37034434293747 estimated_peak_memory_range: - min: 16384 - max: 2349024 + min: 28672 + max: 2113784 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +54,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 148 - job_id: j1p3m773g + job_id: jqp4jwm2p job_status: Passed torchscript_onnx_qnn: - inference_time: 3066.0 - throughput: 326.1578604044357 + inference_time: 3084.0 + throughput: 324.25421530479895 estimated_peak_memory_range: - min: 12288 - max: 34091696 + min: 16384 + max: 35906456 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +69,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jygz7y0op + job_id: jep23m46g job_status: Passed torchscript_onnx_ort: - inference_time: 3564.0 - throughput: 280.58361391694723 + inference_time: 3364.0 + throughput: 297.2651605231867 estimated_peak_memory_range: - min: 8192 - max: 136690536 + min: 12288 + max: 140467400 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 154 + layers_on_npu: 151 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 154 - job_id: jz57dn8v5 + total_layers: 151 + job_id: jogkry9v5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +93,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:42.728144Z' + timestamp: '2024-06-08T23:09:57Z' - torchscript_onnx_tflite: - inference_time: 2099.0 - throughput: 476.41734159123394 + inference_time: 2072.0 + throughput: 482.6254826254826 estimated_peak_memory_range: min: 12288 - max: 256604400 + max: 258677904 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +107,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 148 - job_id: jwgovwwq5 + job_id: j0pxe1385 job_status: Passed torchscript_onnx_qnn: - inference_time: 2271.0 - throughput: 440.33465433729634 + inference_time: 2331.0 + throughput: 429.000429000429 estimated_peak_memory_range: min: 12288 - max: 115257056 + max: 119524448 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +122,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jz5w9zr3p + job_id: jqpyvdq0p job_status: Passed torchscript_onnx_ort: - inference_time: 2622.0 - throughput: 381.38825324180016 + inference_time: 2469.0 + throughput: 405.0222762251924 estimated_peak_memory_range: - min: 618496 - max: 97094352 + min: 12288 + max: 93879712 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 154 + layers_on_npu: 151 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 154 - job_id: jqp4w428g + total_layers: 151 + job_id: jn5q92mep job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +146,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:42.728227Z' + timestamp: '2024-06-08T23:09:58Z' - torchscript_onnx_tflite: - inference_time: 2843.0 - throughput: 351.74111853675697 + inference_time: 2846.0 + throughput: 351.37034434293747 estimated_peak_memory_range: - min: 28672 - max: 2594968 + min: 16384 + max: 2438744 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +160,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 148 - job_id: j1pvwmnkg + job_id: jo5mvzo75 job_status: Passed torchscript_onnx_qnn: - inference_time: 3075.0 - throughput: 325.2032520325203 + inference_time: 3060.0 + throughput: 326.797385620915 estimated_peak_memory_range: min: 16384 - max: 35433304 + max: 35555384 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +175,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jnp181m8g + job_id: j1p8w74qp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +184,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:42.728279Z' + timestamp: '2024-06-08T23:09:56Z' - torchscript_onnx_tflite: - inference_time: 10225.0 - throughput: 97.79951100244499 + inference_time: 10195.0 + throughput: 98.0872976949485 estimated_peak_memory_range: - min: 12288 - max: 194683600 + min: 53248 + max: 195935712 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,22 +198,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 148 - job_id: j7gjly8vp - job_status: Passed - torchscript_onnx_qnn: - inference_time: 10708.0 - throughput: 93.38812103100486 - estimated_peak_memory_range: - min: 0 - max: 124741440 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 146 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 146 - job_id: jvgdv4mrg + job_id: jegnreoj5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -222,13 +207,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-05-29T18:59:42.728331Z' + timestamp: '2024-06-08T23:09:51Z' - torchscript_onnx_tflite: - inference_time: 132850.0 - throughput: 7.527286413248024 + inference_time: 131262.0 + throughput: 7.618351084091359 estimated_peak_memory_range: min: 12288 - max: 357543064 + max: 356618752 primary_compute_unit: GPU precision: int8 layer_info: @@ -236,7 +221,7 @@ models: layers_on_gpu: 125 layers_on_cpu: 11 total_layers: 148 - job_id: jlpevxno5 + job_id: jopr1yokg job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -245,13 +230,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-05-29T18:59:42.728360Z' + timestamp: '2024-06-08T23:09:52Z' - torchscript_onnx_qnn: - inference_time: 3362.0 - throughput: 297.441998810232 + inference_time: 3311.0 + throughput: 302.0235578375113 estimated_peak_memory_range: - min: 253952 - max: 253952 + min: 262144 + max: 262144 primary_compute_unit: NPU precision: int8 layer_info: @@ -259,22 +244,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 146 - job_id: jmg942qw5 + job_id: j2p0erv05 job_status: Passed torchscript_onnx_ort: - inference_time: 3382.0 - throughput: 295.68302779420463 + inference_time: 3294.0 + throughput: 303.58227079538557 estimated_peak_memory_range: - min: 137187328 - max: 137187328 + min: 12066816 + max: 12066816 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 154 + layers_on_npu: 151 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 154 - job_id: j0px1rz3g + total_layers: 151 + job_id: j1glek12p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -283,4 +268,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:42.728413Z' + timestamp: '2024-06-08T23:09:59Z' diff --git a/qai_hub_models/models/resnext50/README.md b/qai_hub_models/models/resnext50/README.md index 57bd0206..68e67be5 100644 --- a/qai_hub_models/models/resnext50/README.md +++ b/qai_hub_models/models/resnext50/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of ResNeXt50 can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/abs/1611.05431) diff --git a/qai_hub_models/models/resnext50/evaluate.py b/qai_hub_models/models/resnext50/evaluate.py new file mode 100644 index 00000000..50d09632 --- /dev/null +++ b/qai_hub_models/models/resnext50/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.resnext50 import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/resnext50/export.py b/qai_hub_models/models/resnext50/export.py index b30842ef..ecbf5998 100644 --- a/qai_hub_models/models/resnext50/export.py +++ b/qai_hub_models/models/resnext50/export.py @@ -188,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/resnext50/info.yaml b/qai_hub_models/models/resnext50/info.yaml index 9ba350da..1270dfa3 100644 --- a/qai_hub_models/models/resnext50/info.yaml +++ b/qai_hub_models/models/resnext50/info.yaml @@ -39,3 +39,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/resnext50/perf.yaml b/qai_hub_models/models/resnext50/perf.yaml index fe0146fc..6ce25b12 100644 --- a/qai_hub_models/models/resnext50/perf.yaml +++ b/qai_hub_models/models/resnext50/perf.yaml @@ -36,11 +36,11 @@ models: - name: ResNeXt50 performance_metrics: - torchscript_onnx_tflite: - inference_time: 2499.0 - throughput: 400.16006402561027 + inference_time: 2511.0 + throughput: 398.24771007566704 estimated_peak_memory_range: - min: 16384 - max: 2423408 + min: 12288 + max: 2265792 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jegneqwkg + job_id: j1p3qmwm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2534.0 - throughput: 394.6329913180742 + inference_time: 2553.0 + throughput: 391.6960438699569 estimated_peak_memory_range: - min: 12288 - max: 98425656 + min: 57344 + max: 21403728 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jqpyd2y8p + job_id: j7gjklw15 job_status: Passed torchscript_onnx_ort: - inference_time: 2810.0 - throughput: 355.87188612099646 + inference_time: 2768.0 + throughput: 361.271676300578 estimated_peak_memory_range: - min: 36864 - max: 126928704 + min: 16384 + max: 171552072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jn5q21dn5 + job_id: jmg994xmg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:42.771651Z' + timestamp: '2024-06-08T23:10:26Z' - torchscript_onnx_tflite: - inference_time: 1788.0 - throughput: 559.2841163310962 + inference_time: 1800.0 + throughput: 555.5555555555555 estimated_peak_memory_range: min: 16384 - max: 163487424 + max: 163995360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jopryd70g + job_id: jwgoev41p job_status: Passed torchscript_onnx_qnn: - inference_time: 1875.0 - throughput: 533.3333333333334 + inference_time: 1878.0 + throughput: 532.4813631522896 estimated_peak_memory_range: - min: 618496 - max: 60135536 + min: 0 + max: 60231440 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j2p0r9x9p + job_id: jlpe4vl85 job_status: Passed torchscript_onnx_ort: - inference_time: 1970.0 - throughput: 507.61421319796955 + inference_time: 1929.0 + throughput: 518.4033177812338 estimated_peak_memory_range: min: 618496 - max: 38894176 + max: 41928304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: j1glk8qjp + job_id: jnp1q8vng job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:42.771721Z' + timestamp: '2024-06-08T23:10:27Z' - torchscript_onnx_tflite: - inference_time: 2508.0 - throughput: 398.72408293460927 + inference_time: 2498.0 + throughput: 400.320256204964 estimated_peak_memory_range: - min: 24576 - max: 2263136 + min: 20480 + max: 2219560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jep2mdzr5 + job_id: j1pvzw9zg job_status: Passed torchscript_onnx_qnn: - inference_time: 2577.0 - throughput: 388.04811796662784 + inference_time: 2553.0 + throughput: 391.6960438699569 estimated_peak_memory_range: - min: 622592 - max: 88271880 + min: 20480 + max: 88251120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jogky0kwp + job_id: jz5wm914g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:42.771766Z' + timestamp: '2024-06-08T23:10:25Z' - torchscript_onnx_qnn: - inference_time: 2906.0 - throughput: 344.1156228492774 + inference_time: 2941.0 + throughput: 340.02040122407345 estimated_peak_memory_range: - min: 602112 - max: 602112 + min: 1044480 + max: 1044480 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j1p87rkk5 + job_id: jygzv744p job_status: Passed torchscript_onnx_ort: - inference_time: 2653.0 - throughput: 376.9317753486619 + inference_time: 2624.0 + throughput: 381.0975609756098 estimated_peak_memory_range: - min: 65294336 - max: 65294336 + min: 46874624 + max: 46874624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jw561m06p + job_id: jvgd7vz6g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:42.771815Z' + timestamp: '2024-06-08T23:10:28Z' diff --git a/qai_hub_models/models/resnext50_quantized/README.md b/qai_hub_models/models/resnext50_quantized/README.md index 9950b426..3ce0b330 100644 --- a/qai_hub_models/models/resnext50_quantized/README.md +++ b/qai_hub_models/models/resnext50_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of ResNeXt50Quantized can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/abs/1611.05431) diff --git a/qai_hub_models/models/resnext50_quantized/evaluate.py b/qai_hub_models/models/resnext50_quantized/evaluate.py new file mode 100644 index 00000000..1eb23114 --- /dev/null +++ b/qai_hub_models/models/resnext50_quantized/evaluate.py @@ -0,0 +1,62 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.resnext50_quantized import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.base_model import BaseModel +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args +from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + + # Use Fp16 model for torch inference + for cls in Model.__mro__: + if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin): + torch_cls = cls + break + torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/resnext50_quantized/export.py b/qai_hub_models/models/resnext50_quantized/export.py index 42967b4b..73b7aa57 100644 --- a/qai_hub_models/models/resnext50_quantized/export.py +++ b/qai_hub_models/models/resnext50_quantized/export.py @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/resnext50_quantized/info.yaml b/qai_hub_models/models/resnext50_quantized/info.yaml index 69be8e37..1b2c5291 100644 --- a/qai_hub_models/models/resnext50_quantized/info.yaml +++ b/qai_hub_models/models/resnext50_quantized/info.yaml @@ -41,3 +41,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/resnext50_quantized/perf.yaml b/qai_hub_models/models/resnext50_quantized/perf.yaml index be21a304..b5dae635 100644 --- a/qai_hub_models/models/resnext50_quantized/perf.yaml +++ b/qai_hub_models/models/resnext50_quantized/perf.yaml @@ -42,11 +42,11 @@ models: - name: ResNeXt50Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 941.0 - throughput: 1062.6992561105208 + inference_time: 943.0 + throughput: 1060.4453870625662 estimated_peak_memory_range: - min: 24576 - max: 1725856 + min: 32768 + max: 1732496 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +54,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 80 - job_id: jwgovw9q5 + job_id: jqp4jw92p job_status: Passed torchscript_onnx_qnn: - inference_time: 1174.0 - throughput: 851.7887563884157 + inference_time: 1179.0 + throughput: 848.1764206955047 estimated_peak_memory_range: - min: 0 - max: 10549448 + min: 20480 + max: 66746984 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +69,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 78 - job_id: jz5w9z03p + job_id: jep23mv6g job_status: Passed torchscript_onnx_ort: - inference_time: 1535.0 - throughput: 651.4657980456026 + inference_time: 1353.0 + throughput: 739.0983000739099 estimated_peak_memory_range: - min: 12288 - max: 88731520 + min: 28672 + max: 79646016 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 86 + layers_on_npu: 83 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 86 - job_id: jmg942785 + total_layers: 83 + job_id: jn5q92vep job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +93,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:42.806370Z' + timestamp: '2024-06-08T23:11:25Z' - torchscript_onnx_tflite: - inference_time: 720.0 - throughput: 1388.888888888889 + inference_time: 710.0 + throughput: 1408.4507042253522 estimated_peak_memory_range: - min: 12288 - max: 100104144 + min: 1523712 + max: 101683104 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +107,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 80 - job_id: j1pvwmykg + job_id: j0pxe1d85 job_status: Passed torchscript_onnx_qnn: - inference_time: 875.0 - throughput: 1142.857142857143 + inference_time: 873.0 + throughput: 1145.475372279496 estimated_peak_memory_range: - min: 167936 - max: 53235392 + min: 163840 + max: 57724624 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +122,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 78 - job_id: jmg9427w5 + job_id: jqpyvd70p job_status: Passed torchscript_onnx_ort: - inference_time: 1140.0 - throughput: 877.1929824561404 + inference_time: 991.0 + throughput: 1009.0817356205853 estimated_peak_memory_range: - min: 618496 - max: 42079072 + min: 28672 + max: 41643216 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 86 + layers_on_npu: 83 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 86 - job_id: jnp181k7g + total_layers: 83 + job_id: j1glekl2p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +146,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:42.806430Z' + timestamp: '2024-06-08T23:11:26Z' - torchscript_onnx_tflite: - inference_time: 939.0 - throughput: 1064.9627263045793 + inference_time: 944.0 + throughput: 1059.322033898305 estimated_peak_memory_range: min: 12288 - max: 1827928 + max: 2151184 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +160,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 80 - job_id: j7gjly6vp + job_id: jo5mvzd75 job_status: Passed torchscript_onnx_qnn: - inference_time: 1172.0 - throughput: 853.2423208191126 + inference_time: 1183.0 + throughput: 845.30853761623 estimated_peak_memory_range: - min: 172032 - max: 10872072 + min: 16384 + max: 66707936 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +175,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 78 - job_id: jvgdv4yrg + job_id: jogkry8v5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +184,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:42.806470Z' + timestamp: '2024-06-08T23:11:24Z' - torchscript_onnx_tflite: - inference_time: 3073.0 - throughput: 325.4149040026033 + inference_time: 3287.0 + throughput: 304.228780042592 estimated_peak_memory_range: - min: 0 - max: 54569392 + min: 12288 + max: 55813072 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,22 +198,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 80 - job_id: jlpevx0o5 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 3634.0 - throughput: 275.178866263071 - estimated_peak_memory_range: - min: 163840 - max: 53528640 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 78 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 78 - job_id: jz5w9z0mp + job_id: jegnre7j5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -222,13 +207,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-05-29T18:59:42.806508Z' + timestamp: '2024-06-08T23:11:19Z' - torchscript_onnx_tflite: - inference_time: 63166.0 - throughput: 15.831301649621633 + inference_time: 64039.0 + throughput: 15.615484314246006 estimated_peak_memory_range: - min: 0 - max: 98697840 + min: 868352 + max: 98172464 primary_compute_unit: GPU precision: int8 layer_info: @@ -236,7 +221,7 @@ models: layers_on_gpu: 57 layers_on_cpu: 11 total_layers: 80 - job_id: jygz7yqop + job_id: jopr1ynkg job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -245,13 +230,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-05-29T18:59:42.806531Z' + timestamp: '2024-06-08T23:11:20Z' - torchscript_onnx_qnn: - inference_time: 1357.0 - throughput: 736.9196757553427 + inference_time: 1350.0 + throughput: 740.7407407407408 estimated_peak_memory_range: - min: 413696 - max: 413696 + min: 1429504 + max: 1429504 primary_compute_unit: NPU precision: int8 layer_info: @@ -259,22 +244,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 78 - job_id: jnp181k8g + job_id: j2p0er605 job_status: Passed torchscript_onnx_ort: - inference_time: 1395.0 - throughput: 716.8458781362007 + inference_time: 1264.0 + throughput: 791.1392405063291 estimated_peak_memory_range: - min: 52191232 - max: 52191232 + min: 24887296 + max: 24887296 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 86 + layers_on_npu: 83 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 86 - job_id: jvgdv4yzg + total_layers: 83 + job_id: jw56q1wng job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -283,4 +268,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:42.806571Z' + timestamp: '2024-06-08T23:11:27Z' diff --git a/qai_hub_models/models/riffusion_quantized/README.md b/qai_hub_models/models/riffusion_quantized/README.md index 69d0e0ef..9c5b1a50 100644 --- a/qai_hub_models/models/riffusion_quantized/README.md +++ b/qai_hub_models/models/riffusion_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Riffusion can be found [here](https://github.com/CompVis/stable-diffusion/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://github.com/CompVis/stable-diffusion/blob/main/LICENSE) ## References * [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) diff --git a/qai_hub_models/models/sam/README.md b/qai_hub_models/models/sam/README.md index e4bc8748..a0ba93db 100644 --- a/qai_hub_models/models/sam/README.md +++ b/qai_hub_models/models/sam/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Segment-Anything-Model can be found [here](https://github.com/facebookresearch/segment-anything/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Segment Anything](https://arxiv.org/abs/2304.02643) diff --git a/qai_hub_models/models/sam/export.py b/qai_hub_models/models/sam/export.py index 1492c33b..962a8058 100644 --- a/qai_hub_models/models/sam/export.py +++ b/qai_hub_models/models/sam/export.py @@ -204,7 +204,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -251,6 +251,7 @@ def main(): components=ALL_COMPONENTS, supports_qnn=False, supports_ort=False, + supports_precompiled_ort=False, ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/sam/perf.yaml b/qai_hub_models/models/sam/perf.yaml index 515dc0f5..370ec409 100644 --- a/qai_hub_models/models/sam/perf.yaml +++ b/qai_hub_models/models/sam/perf.yaml @@ -36,19 +36,19 @@ models: - name: SAMDecoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 48828.0 - throughput: 20.48005242893422 + inference_time: 48230.0 + throughput: 20.733982998133943 estimated_peak_memory_range: - min: 4276224 - max: 12829288 + min: 4026368 + max: 7727688 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 342 + layers_on_npu: 340 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 342 - job_id: jqpyd2klp + total_layers: 340 + job_id: j7gjklq15 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -57,21 +57,21 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:42.850316Z' + timestamp: '2024-06-08T23:12:47Z' - torchscript_onnx_tflite: - inference_time: 35208.0 - throughput: 28.402635764598955 + inference_time: 34548.0 + throughput: 28.9452356142179 estimated_peak_memory_range: - min: 2613248 - max: 249349152 + min: 12288 + max: 245149360 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 342 + layers_on_npu: 340 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 342 - job_id: j1p87rdo5 + total_layers: 340 + job_id: jygzv7n4p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -80,21 +80,21 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:42.850373Z' + timestamp: '2024-06-08T23:12:49Z' - torchscript_onnx_tflite: - inference_time: 48966.0 - throughput: 20.422333864314012 + inference_time: 48060.0 + throughput: 20.807324178110694 estimated_peak_memory_range: - min: 12288 - max: 8203928 + min: 4009984 + max: 12530416 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 342 + layers_on_npu: 340 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 342 - job_id: jn5q21xo5 + total_layers: 340 + job_id: jmg994dmg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -103,23 +103,15 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:42.850426Z' - - reference_device_info: - name: Snapdragon X Elite CRD - os: '11' - form_factor: Compute - os_name: Windows - manufacturer: Qualcomm - chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:42.850433Z' + timestamp: '2024-06-08T23:12:50Z' - name: SAMEncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 11078146.0 - throughput: 0.0902678119605934 + inference_time: 12009970.0 + throughput: 0.08326415469813829 estimated_peak_memory_range: - min: 2718310400 - max: 2721580152 + min: 2723000320 + max: 2727292856 primary_compute_unit: CPU precision: fp32 layer_info: @@ -127,7 +119,7 @@ models: layers_on_gpu: 37 layers_on_cpu: 771 total_layers: 808 - job_id: j2p0r98np + job_id: jlpe4vy85 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -136,13 +128,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:42.850531Z' + timestamp: '2024-06-08T23:12:47Z' - torchscript_onnx_tflite: - inference_time: 10431133.0 - throughput: 0.09586686316817167 + inference_time: 9639117.0 + throughput: 0.10374394252087614 estimated_peak_memory_range: - min: 2547875840 - max: 2907987472 + min: 2582843392 + max: 2946188672 primary_compute_unit: CPU precision: fp32 layer_info: @@ -150,7 +142,7 @@ models: layers_on_gpu: 37 layers_on_cpu: 771 total_layers: 808 - job_id: jogky0wnp + job_id: jz5wm944g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -159,13 +151,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:42.850628Z' + timestamp: '2024-06-08T23:12:49Z' - torchscript_onnx_tflite: - inference_time: 11464171.0 - throughput: 0.08722828715656807 + inference_time: 11285658.0 + throughput: 0.08860803685527242 estimated_peak_memory_range: - min: 2717503488 - max: 2721696880 + min: 2642145280 + max: 2645812336 primary_compute_unit: CPU precision: fp32 layer_info: @@ -173,7 +165,7 @@ models: layers_on_gpu: 37 layers_on_cpu: 771 total_layers: 808 - job_id: j1glk8dmp + job_id: jnp1q86ng job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -182,12 +174,4 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:42.850725Z' - - reference_device_info: - name: Snapdragon X Elite CRD - os: '11' - form_factor: Compute - os_name: Windows - manufacturer: Qualcomm - chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:42.850731Z' + timestamp: '2024-06-08T23:12:51Z' diff --git a/qai_hub_models/models/sesr_m5/README.md b/qai_hub_models/models/sesr_m5/README.md index eb36ea36..9cec4f6c 100644 --- a/qai_hub_models/models/sesr_m5/README.md +++ b/qai_hub_models/models/sesr_m5/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of SESR-M5 can be found [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Collapsible Linear Blocks for Super-Efficient Super Resolution](https://arxiv.org/abs/2103.09404) diff --git a/qai_hub_models/models/sesr_m5/demo.py b/qai_hub_models/models/sesr_m5/demo.py index 312bbab4..51607bda 100644 --- a/qai_hub_models/models/sesr_m5/demo.py +++ b/qai_hub_models/models/sesr_m5/demo.py @@ -3,12 +3,7 @@ # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- from qai_hub_models.models._shared.super_resolution.demo import super_resolution_demo -from qai_hub_models.models.sesr_m5.model import MODEL_ASSET_VERSION, MODEL_ID, SESR_M5 -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset - -IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, "sesr_m5_demo.jpg" -) +from qai_hub_models.models.sesr_m5.model import MODEL_ID, SESR_M5 # Run QuickSRNet end-to-end on a sample image. @@ -17,7 +12,6 @@ def main(is_test: bool = False): super_resolution_demo( model_cls=SESR_M5, model_id=MODEL_ID, - default_image=IMAGE_ADDRESS, is_test=is_test, ) diff --git a/qai_hub_models/models/sesr_m5/export.py b/qai_hub_models/models/sesr_m5/export.py index 07e5a42c..de36e9d6 100644 --- a/qai_hub_models/models/sesr_m5/export.py +++ b/qai_hub_models/models/sesr_m5/export.py @@ -186,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/sesr_m5/info.yaml b/qai_hub_models/models/sesr_m5/info.yaml index 06f7dae2..f6715c77 100644 --- a/qai_hub_models/models/sesr_m5/info.yaml +++ b/qai_hub_models/models/sesr_m5/info.yaml @@ -10,13 +10,14 @@ tags: [] research_paper: https://arxiv.org/abs/2103.09404 research_paper_title: Collapsible Linear Blocks for Super-Efficient Super Resolution license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf -deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf +deploy_license: + https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/sesr technical_details: - Model checkpoint: sesr_m5_4x_checkpoint_float32 - Input resolution: 128x128 - Number of parameters: 343K - Model size: 1.32 MB + Model checkpoint: sesr_m5_3x_checkpoint + Input resolution: 640x360 + Number of parameters: 338K + Model size: 1.30 MB applicable_scenarios: - Virtual Real Estate Tours - Gaming diff --git a/qai_hub_models/models/sesr_m5/model.py b/qai_hub_models/models/sesr_m5/model.py index c7283ab9..b8aa863e 100644 --- a/qai_hub_models/models/sesr_m5/model.py +++ b/qai_hub_models/models/sesr_m5/model.py @@ -4,81 +4,43 @@ # --------------------------------------------------------------------- from __future__ import annotations -import torch +from pathlib import Path -from qai_hub_models.evaluators.base_evaluators import BaseEvaluator -from qai_hub_models.evaluators.superres_evaluator import SuperResolutionOutputEvaluator from qai_hub_models.models._shared.sesr.common import _load_sesr_source_model -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset -from qai_hub_models.utils.base_model import BaseModel -from qai_hub_models.utils.input_spec import InputSpec +from qai_hub_models.models._shared.super_resolution.model import ( + DEFAULT_SCALE_FACTOR, + SuperResolutionModel, + validate_scale_factor, +) +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_torch MODEL_ID = __name__.split(".")[-2] MODEL_ASSET_VERSION = 3 -# Weights and config stored in S3 are sourced from -# https://github.com/quic/aimet-model-zoo/blob/develop/aimet_zoo_torch/sesr/model/model_cards/sesr_m5_2x_w8a8.json -# https://github.com/quic/aimet-model-zoo/releases/download/phase_2_february_artifacts/sesr_m5_2x_checkpoint_float32.pth.tar -SESR_WEIGHTS = "sesr_m5_4x_checkpoint_float32.pth.tar" -SCALING_FACTOR = 4 +BASE_ASSET_URL = "https://github.com/quic/aimet-model-zoo/releases/download/phase_2_february_artifacts/sesr_m5_{scale_factor}x_checkpoint_float32.pth.tar" NUM_CHANNELS = 16 NUM_LBLOCKS = 5 -class SESR_M5(BaseModel): - """Exportable SESR M5 super resolution model, end-to-end.""" - - def __init__( - self, - sesr_model: torch.nn.Module, - ) -> None: - super().__init__() - self.model = sesr_model +class SESR_M5(SuperResolutionModel): + """Exportable SESR_M5 super resolution model, end-to-end.""" @classmethod - def from_pretrained(cls) -> SESR_M5: + def from_pretrained(cls, scale_factor: int = DEFAULT_SCALE_FACTOR) -> SESR_M5: + validate_scale_factor(scale_factor) model = _load_sesr_source_model( - SCALING_FACTOR, + scale_factor, NUM_CHANNELS, NUM_LBLOCKS, ) - dst = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, SESR_WEIGHTS - ).fetch() - checkpoint = torch.load(dst, map_location=torch.device("cpu")) + url = BASE_ASSET_URL.format(scale_factor=scale_factor) + checkpoint_asset = CachedWebModelAsset( + url, + MODEL_ID, + MODEL_ASSET_VERSION, + Path(url).name, + ) + checkpoint = load_torch(checkpoint_asset) model.load_state_dict(checkpoint["state_dict"]) model.eval() - return cls(model) - - def get_evaluator(self) -> BaseEvaluator: - return SuperResolutionOutputEvaluator() - - def forward(self, image: torch.Tensor) -> torch.Tensor: - """ - Run SESR M5 on `image`, and produce an upscaled image - - Parameters: - image: Pixel values pre-processed for model consumption. - Range: float[0, 1] - 3-channel Color Space: RGB - - Returns: - image: Pixel values - Range: float[0, 1] - 3-channel Color Space: RGB - """ - - return self.model(image) - - @staticmethod - def get_input_spec( - batch_size: int = 1, - num_channels: int = 3, - height: int = 128, - width: int = 128, - ) -> InputSpec: - # Get the input specification ordered (name -> (shape, type)) pairs for this model. - # - # This can be used with the qai_hub python API to declare - # the model input specification upon submitting a profile job. - return {"image": ((batch_size, num_channels, height, width), "float32")} + return cls(model, scale_factor) diff --git a/qai_hub_models/models/sesr_m5/perf.yaml b/qai_hub_models/models/sesr_m5/perf.yaml index 22d99c5a..04e56cbe 100644 --- a/qai_hub_models/models/sesr_m5/perf.yaml +++ b/qai_hub_models/models/sesr_m5/perf.yaml @@ -36,11 +36,11 @@ models: - name: SESR-M5 performance_metrics: - torchscript_onnx_tflite: - inference_time: 2218.0 - throughput: 450.8566275924256 + inference_time: 2201.0 + throughput: 454.3389368468878 estimated_peak_memory_range: - min: 20480 - max: 2072008 + min: 16384 + max: 2206696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: jmg942r85 + job_id: j1gle1ymp job_status: Passed torchscript_onnx_qnn: - inference_time: 2126.0 - throughput: 470.36688617121354 + inference_time: 2133.0 + throughput: 468.8232536333802 estimated_peak_memory_range: - min: 16384 - max: 7778080 + min: 2113536 + max: 6868544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jz57dnm95 + job_id: jwgoe4lkp job_status: Passed torchscript_onnx_ort: - inference_time: 2863.0 - throughput: 349.28396786587496 + inference_time: 2875.0 + throughput: 347.82608695652175 estimated_peak_memory_range: - min: 24576 - max: 5828608 + min: 12288 + max: 6151368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 33 - job_id: jegneq4qg + job_id: jygzv4lxp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:42.879776Z' + timestamp: '2024-06-11T11:58:43Z' - torchscript_onnx_tflite: inference_time: 1621.0 throughput: 616.9031462060457 estimated_peak_memory_range: min: 16384 - max: 24958624 + max: 25573456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: jnp18197g + job_id: jw56qd8yg job_status: Passed torchscript_onnx_qnn: - inference_time: 1468.0 - throughput: 681.1989100817439 + inference_time: 1460.0 + throughput: 684.931506849315 estimated_peak_memory_range: - min: 0 - max: 21931168 + min: 204800 + max: 26892880 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jqp4w471g + job_id: j1pvz9lrg job_status: Passed torchscript_onnx_ort: - inference_time: 1906.0 - throughput: 524.6589716684156 + inference_time: 1954.0 + throughput: 511.77072671443193 estimated_peak_memory_range: - min: 208896 - max: 17848208 + min: 212992 + max: 20764320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 33 - job_id: joprydr7g + job_id: jz5wm1lmg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:42.879821Z' + timestamp: '2024-06-11T11:58:43Z' - torchscript_onnx_tflite: - inference_time: 2280.0 - throughput: 438.5964912280702 + inference_time: 2290.0 + throughput: 436.68122270742356 estimated_peak_memory_range: - min: 24576 - max: 1992472 + min: 28672 + max: 8571536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 25 - job_id: jvgdv4kzg + job_id: j1p3qwzn5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2143.0 - throughput: 466.63555762949136 + inference_time: 2135.0 + throughput: 468.384074941452 estimated_peak_memory_range: - min: 217088 - max: 4245840 + min: 16384 + max: 9688296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jo5mzk79p + job_id: jlpe4l7v5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:42.879850Z' + timestamp: '2024-06-11T11:58:42Z' - torchscript_onnx_qnn: - inference_time: 2938.0 - throughput: 340.3675970047652 + inference_time: 2971.0 + throughput: 336.58700774150117 estimated_peak_memory_range: - min: 245760 - max: 245760 + min: 221184 + max: 221184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j0px1rqlg + job_id: j7gjkwre5 job_status: Passed torchscript_onnx_ort: - inference_time: 2937.0 - throughput: 340.4834865509023 + inference_time: 2911.0 + throughput: 343.52456200618343 estimated_peak_memory_range: - min: 12759040 - max: 12759040 + min: 12976128 + max: 12976128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 33 - job_id: jep2md1q5 + job_id: jmg99xz8g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:42.879880Z' + timestamp: '2024-06-11T11:58:44Z' diff --git a/qai_hub_models/models/sesr_m5/test.py b/qai_hub_models/models/sesr_m5/test.py index e59f48d4..e203aa30 100644 --- a/qai_hub_models/models/sesr_m5/test.py +++ b/qai_hub_models/models/sesr_m5/test.py @@ -5,7 +5,7 @@ import numpy as np from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp -from qai_hub_models.models.sesr_m5.demo import IMAGE_ADDRESS +from qai_hub_models.models._shared.super_resolution.demo import IMAGE_ADDRESS from qai_hub_models.models.sesr_m5.demo import main as demo_main from qai_hub_models.models.sesr_m5.model import MODEL_ASSET_VERSION, MODEL_ID, SESR_M5 from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image @@ -23,9 +23,6 @@ def test_task(): model = SESR_M5.from_pretrained() app = SuperResolutionApp(model=model) output_img = app.upscale_image(image)[0] - - output_img.save("/local/mnt/workspace/sesr_m5_output.png") - expected_output_image = load_image(OUTPUT_IMAGE_ADDRESS) assert_most_same( np.asarray(expected_output_image, dtype=np.float32), diff --git a/qai_hub_models/models/sesr_m5_quantized/README.md b/qai_hub_models/models/sesr_m5_quantized/README.md index 57bc9490..f8346830 100644 --- a/qai_hub_models/models/sesr_m5_quantized/README.md +++ b/qai_hub_models/models/sesr_m5_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of SESR-M5-Quantized can be found [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Collapsible Linear Blocks for Super-Efficient Super Resolution](https://arxiv.org/abs/2103.09404) diff --git a/qai_hub_models/models/sesr_m5_quantized/demo.py b/qai_hub_models/models/sesr_m5_quantized/demo.py index 4d063bdd..9c5c43c0 100644 --- a/qai_hub_models/models/sesr_m5_quantized/demo.py +++ b/qai_hub_models/models/sesr_m5_quantized/demo.py @@ -3,26 +3,14 @@ # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- from qai_hub_models.models._shared.super_resolution.demo import super_resolution_demo -from qai_hub_models.models.sesr_m5_quantized.model import ( - MODEL_ASSET_VERSION, - MODEL_ID, - SESR_M5Quantizable, -) -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset -from qai_hub_models.utils.base_model import TargetRuntime - -IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, "sesr_m5_quantized_demo.jpg" -) +from qai_hub_models.models.sesr_m5_quantized.model import MODEL_ID, SESR_M5Quantizable def main(is_test: bool = False): super_resolution_demo( SESR_M5Quantizable, MODEL_ID, - default_image=IMAGE_ADDRESS, is_test=is_test, - available_target_runtimes=[TargetRuntime.TFLITE], ) diff --git a/qai_hub_models/models/sesr_m5_quantized/export.py b/qai_hub_models/models/sesr_m5_quantized/export.py index 98ccc38e..770f96a9 100644 --- a/qai_hub_models/models/sesr_m5_quantized/export.py +++ b/qai_hub_models/models/sesr_m5_quantized/export.py @@ -30,6 +30,7 @@ can_access_qualcomm_ai_hub, export_without_hub_access, transpose_channel_first_to_last, + transpose_channel_last_to_first, ) from qai_hub_models.utils.qnn_helpers import get_qnn_inputs @@ -124,7 +125,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( - " --force_channel_last_input image" + " --force_channel_last_input image" + " --force_channel_last_output output_0" if target_runtime != TargetRuntime.ORT else "" ) @@ -193,7 +194,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -212,6 +213,14 @@ def export_model( torch_out = torch_inference(model, sample_inputs) assert inference_job is not None and inference_job.wait().success inference_result: hub.client.DatasetEntries = inference_job.download_output_data() # type: ignore + # Convert outputs from channel last to channel first + inference_result = ( + inference_result + if target_runtime == TargetRuntime.ORT + else transpose_channel_last_to_first( + "output_0", inference_result, target_runtime + ) + ) print_inference_metrics(inference_job, inference_result, torch_out) return (compile_job, profile_job, inference_job) diff --git a/qai_hub_models/models/sesr_m5_quantized/info.yaml b/qai_hub_models/models/sesr_m5_quantized/info.yaml index f62e37c6..1852e48b 100644 --- a/qai_hub_models/models/sesr_m5_quantized/info.yaml +++ b/qai_hub_models/models/sesr_m5_quantized/info.yaml @@ -10,13 +10,14 @@ tags: [quantized] research_paper: https://arxiv.org/abs/2103.09404 research_paper_title: Collapsible Linear Blocks for Super-Efficient Super Resolution license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf -deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf +deploy_license: + https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/sesr technical_details: - Model checkpoint: sesr_m5_4x_checkpoint_int8 - Input resolution: 128x128 - Number of parameters: 32.3K - Model size: 45.9 KB + Model checkpoint: sesr_m5_4x_checkpoint + Input resolution: 640x360 + Number of parameters: 338K + Model size: 389 KB applicable_scenarios: - Virtual Real Estate Tours - Gaming diff --git a/qai_hub_models/models/sesr_m5_quantized/model.py b/qai_hub_models/models/sesr_m5_quantized/model.py index de5c875e..2d6a806d 100644 --- a/qai_hub_models/models/sesr_m5_quantized/model.py +++ b/qai_hub_models/models/sesr_m5_quantized/model.py @@ -17,34 +17,21 @@ from aimet_torch.model_preparer import prepare_model from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim -from qai_hub_models.models._shared.sesr.common import _load_sesr_source_model -from qai_hub_models.models.sesr_m5.model import ( - NUM_CHANNELS, - NUM_LBLOCKS, - SCALING_FACTOR, - SESR_M5, -) -from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config +from qai_hub_models.models._shared.super_resolution.model import DEFAULT_SCALE_FACTOR +from qai_hub_models.models.sesr_m5.model import SESR_M5 +from qai_hub_models.utils.aimet.config_loader import get_default_per_tensor_aimet_config from qai_hub_models.utils.asset_loaders import CachedWebModelAsset from qai_hub_models.utils.quantization_aimet import ( constrain_quantized_inputs_to_image_range, ) MODEL_ID = __name__.split(".")[-2] -MODEL_ASSET_VERSION = 3 - -# Weights and config stored in S3 are sourced from -# https://github.com/quic/aimet-model-zoo/blob/develop/aimet_zoo_torch/sesr/model/model_cards/sesr_m5_4x_w8a8.json: -# https://github.com/quic/aimet-model-zoo/releases/download/phase_2_january_artifacts/sesr_m5_4x_checkpoint_int8.pth -# and -# https://raw.githubusercontent.com/quic/aimet/release-aimet-1.23/TrainingExtensions/common/src/python/aimet_common/quantsim_config/default_config_per_channel.js -# Encodings were generated with AIMET QuantSim library -QUANTIZED_WEIGHTS = "sesr_m5_4x_checkpoint_int8.pth" -AIMET_ENCODINGS = "sesr_m5_quantized_encodings.json" +MODEL_ASSET_VERSION = 5 +DEFAULT_ENCODINGS = "sesr_m5_quantized_encodings.json" class SESR_M5Quantizable(AIMETQuantizableMixin, SESR_M5): - """QuickSRNetLarge with post train quantization support. + """SESR_M5 with post train quantization support. Supports only 8 bit weights and activations, and only loads pre-quantized checkpoints. Support for quantizing using your own weights & data will come at a later date.""" @@ -52,38 +39,29 @@ class SESR_M5Quantizable(AIMETQuantizableMixin, SESR_M5): def __init__( self, sesr_model: QuantizationSimModel, + scale_factor: int, ) -> None: - SESR_M5.__init__(self, sesr_model.model) + SESR_M5.__init__(self, sesr_model.model, scale_factor) AIMETQuantizableMixin.__init__(self, sesr_model) @classmethod def from_pretrained( cls, aimet_encodings: str | None = "DEFAULT", + scale_factor: int = DEFAULT_SCALE_FACTOR, ) -> SESR_M5Quantizable: # Load Model - sesr = _load_sesr_source_model(SCALING_FACTOR, NUM_CHANNELS, NUM_LBLOCKS) - # The model is collapsed pre-quantization - see - # https://github.com/quic/aimet-model-zoo/blob/d09d2b0404d10f71a7640a87e9d5e5257b028802/aimet_zoo_torch/common/super_resolution/models.py#L110 - sesr.collapse() + sesr = SESR_M5.from_pretrained(scale_factor) input_shape = SESR_M5.get_input_spec()["image"][0] sesr = prepare_model(sesr) equalize_model(sesr, input_shape) - # Download weights and quantization parameters - weights = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, QUANTIZED_WEIGHTS - ).fetch() - - # Load the model weights and quantization parameters - state_dict = torch.load(weights, map_location=torch.device("cpu"))["state_dict"] - sesr.load_state_dict(state_dict) sim = QuantizationSimModel( sesr, quant_scheme="tf_enhanced", default_param_bw=8, default_output_bw=8, - config_file=get_default_aimet_config(), + config_file=get_default_per_tensor_aimet_config(), dummy_input=torch.rand(input_shape), ) constrain_quantized_inputs_to_image_range(sim) @@ -91,10 +69,10 @@ def from_pretrained( if aimet_encodings: if aimet_encodings == "DEFAULT": aimet_encodings = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, AIMET_ENCODINGS + MODEL_ID, MODEL_ASSET_VERSION, DEFAULT_ENCODINGS ).fetch() load_encodings_to_sim(sim, aimet_encodings) sim.model.eval() - return cls(sim) + return cls(sim, scale_factor) diff --git a/qai_hub_models/models/sesr_m5_quantized/perf.yaml b/qai_hub_models/models/sesr_m5_quantized/perf.yaml index 0140a83e..617858a7 100644 --- a/qai_hub_models/models/sesr_m5_quantized/perf.yaml +++ b/qai_hub_models/models/sesr_m5_quantized/perf.yaml @@ -42,49 +42,49 @@ models: - name: SESR-M5-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1320.0 - throughput: 757.5757575757576 + inference_time: 1220.0 + throughput: 819.672131147541 estimated_peak_memory_range: - min: 32768 - max: 5687536 + min: 24576 + max: 1557800 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 11 + layers_on_npu: 22 layers_on_gpu: 0 layers_on_cpu: 3 - total_layers: 14 - job_id: j2p0r9wnp + total_layers: 25 + job_id: jvgd7zdzg job_status: Passed torchscript_onnx_qnn: - inference_time: 774.0 - throughput: 1291.9896640826873 + inference_time: 1050.0 + throughput: 952.3809523809524 estimated_peak_memory_range: - min: 73728 - max: 15287688 + min: 65536 + max: 4040712 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 14 + layers_on_npu: 26 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 14 - job_id: jw561mkyp + total_layers: 26 + job_id: jegnr7zq5 job_status: Passed torchscript_onnx_ort: - inference_time: 1203.0 - throughput: 831.255195344971 + inference_time: 1055.0 + throughput: 947.8672985781991 estimated_peak_memory_range: - min: 2109440 - max: 12179360 + min: 12288 + max: 4410832 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 19 + layers_on_npu: 29 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 19 - job_id: jlpevxjv5 + total_layers: 29 + job_id: j2p0evmn5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,51 +93,51 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:42.913920Z' + timestamp: '2024-06-11T11:59:10Z' - torchscript_onnx_tflite: - inference_time: 1063.0 - throughput: 940.7337723424271 + inference_time: 1043.0 + throughput: 958.7727708533077 estimated_peak_memory_range: - min: 12288 - max: 21684464 + min: 16384 + max: 23270336 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 11 + layers_on_npu: 22 layers_on_gpu: 0 layers_on_cpu: 3 - total_layers: 14 - job_id: j1p87rno5 + total_layers: 25 + job_id: jz57v7e95 job_status: Passed torchscript_onnx_qnn: - inference_time: 539.0 - throughput: 1855.287569573284 + inference_time: 754.0 + throughput: 1326.2599469496022 estimated_peak_memory_range: - min: 61440 - max: 17221232 + min: 0 + max: 21775952 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 14 + layers_on_npu: 26 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 14 - job_id: j1p3m7yng + total_layers: 26 + job_id: jopr1nl7g job_status: Passed torchscript_onnx_ort: - inference_time: 847.0 - throughput: 1180.637544273908 + inference_time: 808.0 + throughput: 1237.6237623762377 estimated_peak_memory_range: - min: 212992 - max: 13365360 + min: 24576 + max: 16135216 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 19 + layers_on_npu: 29 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 19 - job_id: jygz7y1xp + total_layers: 29 + job_id: j1p8w4eop job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,36 +146,36 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:42.913960Z' + timestamp: '2024-06-11T11:59:11Z' - torchscript_onnx_tflite: - inference_time: 1331.0 - throughput: 751.3148009015778 + inference_time: 1214.0 + throughput: 823.7232289950576 estimated_peak_memory_range: min: 12288 - max: 1824872 + max: 1428272 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 11 + layers_on_npu: 22 layers_on_gpu: 0 layers_on_cpu: 3 - total_layers: 14 - job_id: jogky01np + total_layers: 25 + job_id: jqp4j9y1p job_status: Passed torchscript_onnx_qnn: - inference_time: 772.0 - throughput: 1295.3367875647668 + inference_time: 1049.0 + throughput: 953.2888465204957 estimated_peak_memory_range: - min: 0 - max: 24415048 + min: 12288 + max: 80506384 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 14 + layers_on_npu: 26 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 14 - job_id: j1pvwmjrg + total_layers: 26 + job_id: jqpyv7olp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,36 +184,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:42.913987Z' + timestamp: '2024-06-11T11:59:09Z' - torchscript_onnx_tflite: - inference_time: 4190.0 - throughput: 238.6634844868735 + inference_time: 3090.0 + throughput: 323.62459546925567 estimated_peak_memory_range: - min: 16384 - max: 15082128 + min: 12288 + max: 16873840 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 11 + layers_on_npu: 22 layers_on_gpu: 0 layers_on_cpu: 3 - total_layers: 14 - job_id: jn5q21no5 - job_status: Passed - torchscript_onnx_qnn: - inference_time: 1811.0 - throughput: 552.1811154058531 - estimated_peak_memory_range: - min: 65536 - max: 17231504 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 14 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 14 - job_id: j7gjlyjep + total_layers: 25 + job_id: j0pxedll5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -222,21 +207,21 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-05-29T18:59:42.914013Z' + timestamp: '2024-06-11T11:59:04Z' - torchscript_onnx_tflite: - inference_time: 5060.0 - throughput: 197.62845849802372 + inference_time: 16778.0 + throughput: 59.60185957801883 estimated_peak_memory_range: - min: 3223552 - max: 11197400 + min: 249856 + max: 7422256 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 10 + layers_on_npu: 22 layers_on_gpu: 0 - layers_on_cpu: 4 - total_layers: 14 - job_id: j1glk8jmp + layers_on_cpu: 3 + total_layers: 25 + job_id: jo5mvd095 job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -245,36 +230,36 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-05-29T18:59:42.914028Z' + timestamp: '2024-06-11T11:59:05Z' - torchscript_onnx_qnn: - inference_time: 780.0 - throughput: 1282.051282051282 + inference_time: 1198.0 + throughput: 834.7245409015025 estimated_peak_memory_range: - min: 49152 - max: 49152 + min: 57344 + max: 57344 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 14 + layers_on_npu: 26 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 14 - job_id: jwgovwjk5 + total_layers: 26 + job_id: jep23vrqg job_status: Passed torchscript_onnx_ort: - inference_time: 1138.0 - throughput: 878.7346221441124 + inference_time: 1092.0 + throughput: 915.7509157509157 estimated_peak_memory_range: - min: 9015296 - max: 9015296 + min: 5398528 + max: 5398528 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 19 + layers_on_npu: 29 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 19 - job_id: jz5w9zjmp + total_layers: 29 + job_id: jogkr92n5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -283,4 +268,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:42.914055Z' + timestamp: '2024-06-11T11:59:12Z' diff --git a/qai_hub_models/models/sesr_m5_quantized/test.py b/qai_hub_models/models/sesr_m5_quantized/test.py index 0ed36c55..0ac444df 100644 --- a/qai_hub_models/models/sesr_m5_quantized/test.py +++ b/qai_hub_models/models/sesr_m5_quantized/test.py @@ -10,13 +10,10 @@ import torch from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp -from qai_hub_models.models.sesr_m5_quantized.demo import IMAGE_ADDRESS +from qai_hub_models.models._shared.super_resolution.demo import IMAGE_ADDRESS +from qai_hub_models.models.sesr_m5.model import MODEL_ASSET_VERSION, MODEL_ID from qai_hub_models.models.sesr_m5_quantized.demo import main as demo_main -from qai_hub_models.models.sesr_m5_quantized.model import ( - MODEL_ASSET_VERSION, - MODEL_ID, - SESR_M5Quantizable, -) +from qai_hub_models.models.sesr_m5_quantized.model import SESR_M5Quantizable from qai_hub_models.utils.asset_loaders import ( CachedWebModelAsset, load_image, @@ -24,7 +21,7 @@ ) from qai_hub_models.utils.testing import assert_most_close, skip_clone_repo_check -OUTPUT_IMAGE_LOCAL_PATH = "sesr_m5_quantized_demo_output.png" +OUTPUT_IMAGE_LOCAL_PATH = "sesr_m5_demo_output.png" OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( MODEL_ID, MODEL_ASSET_VERSION, OUTPUT_IMAGE_LOCAL_PATH ) diff --git a/qai_hub_models/models/shufflenet_v2/README.md b/qai_hub_models/models/shufflenet_v2/README.md index 97694e8a..6fcab0d3 100644 --- a/qai_hub_models/models/shufflenet_v2/README.md +++ b/qai_hub_models/models/shufflenet_v2/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Shufflenet-v2 can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design](https://arxiv.org/abs/1807.11164) diff --git a/qai_hub_models/models/shufflenet_v2/evaluate.py b/qai_hub_models/models/shufflenet_v2/evaluate.py new file mode 100644 index 00000000..3952a613 --- /dev/null +++ b/qai_hub_models/models/shufflenet_v2/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.shufflenet_v2 import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/shufflenet_v2/export.py b/qai_hub_models/models/shufflenet_v2/export.py index 74e70e03..a3bfc586 100644 --- a/qai_hub_models/models/shufflenet_v2/export.py +++ b/qai_hub_models/models/shufflenet_v2/export.py @@ -188,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/shufflenet_v2/info.yaml b/qai_hub_models/models/shufflenet_v2/info.yaml index 9663eb73..4610476e 100644 --- a/qai_hub_models/models/shufflenet_v2/info.yaml +++ b/qai_hub_models/models/shufflenet_v2/info.yaml @@ -40,3 +40,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/shufflenet_v2/perf.yaml b/qai_hub_models/models/shufflenet_v2/perf.yaml index 2185322d..0d002af4 100644 --- a/qai_hub_models/models/shufflenet_v2/perf.yaml +++ b/qai_hub_models/models/shufflenet_v2/perf.yaml @@ -36,11 +36,11 @@ models: - name: Shufflenet-v2 performance_metrics: - torchscript_onnx_tflite: - inference_time: 1228.0 - throughput: 814.3322475570033 + inference_time: 1229.0 + throughput: 813.6696501220505 estimated_peak_memory_range: - min: 24576 - max: 2182672 + min: 49152 + max: 1892400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jnp181r7g + job_id: jqpyvdx0p job_status: Passed torchscript_onnx_qnn: - inference_time: 779.0 - throughput: 1283.6970474967907 + inference_time: 767.0 + throughput: 1303.7809647979138 estimated_peak_memory_range: - min: 507904 - max: 18379728 + min: 12288 + max: 127973560 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jqp4w4z1g + job_id: jogkry2v5 job_status: Passed torchscript_onnx_ort: - inference_time: 1097.0 - throughput: 911.5770282588878 + inference_time: 1093.0 + throughput: 914.9130832570905 estimated_peak_memory_range: min: 0 - max: 10126672 + max: 4739736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 223 - job_id: joprydz7g + job_id: j1p3qmzm5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:42.957399Z' + timestamp: '2024-06-08T23:14:43Z' - torchscript_onnx_tflite: - inference_time: 815.0 - throughput: 1226.993865030675 + inference_time: 816.0 + throughput: 1225.4901960784314 estimated_peak_memory_range: - min: 16384 - max: 33495808 + min: 12288 + max: 34358736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jvgdv4jzg + job_id: j2p0erm05 job_status: Passed torchscript_onnx_qnn: - inference_time: 517.0 - throughput: 1934.2359767891683 + inference_time: 519.0 + throughput: 1926.7822736030828 estimated_peak_memory_range: min: 12288 - max: 54005488 + max: 59916624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: j0px1rwlg + job_id: jn5q92lep job_status: Passed torchscript_onnx_ort: - inference_time: 754.0 - throughput: 1326.2599469496022 + inference_time: 762.0 + throughput: 1312.3359580052493 estimated_peak_memory_range: min: 12288 - max: 23340336 + max: 24863536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 223 - job_id: jep2md2q5 + job_id: jwgoevl1p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:42.957489Z' + timestamp: '2024-06-08T23:14:44Z' - torchscript_onnx_tflite: - inference_time: 1229.0 - throughput: 813.6696501220505 + inference_time: 1223.0 + throughput: 817.6614881439084 estimated_peak_memory_range: min: 28672 - max: 1971648 + max: 1440712 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 204 - job_id: jz57dnq95 + job_id: j1p8w7eqp job_status: Passed torchscript_onnx_qnn: - inference_time: 768.0 - throughput: 1302.0833333333333 + inference_time: 769.0 + throughput: 1300.3901170351105 estimated_peak_memory_range: - min: 618496 - max: 17970352 + min: 622592 + max: 138856072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jegneqjqg + job_id: jw56q18ng job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:42.957545Z' + timestamp: '2024-06-08T23:14:42Z' - torchscript_onnx_qnn: - inference_time: 950.0 - throughput: 1052.6315789473683 + inference_time: 1095.0 + throughput: 913.2420091324201 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 158 - job_id: jo5mzkj9p + job_id: j1gleky2p job_status: Passed torchscript_onnx_ort: - inference_time: 1131.0 - throughput: 884.1732979664014 + inference_time: 1103.0 + throughput: 906.6183136899365 estimated_peak_memory_range: - min: 8019968 - max: 8019968 + min: 5971968 + max: 5971968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 223 - job_id: jqpyd29lp + job_id: j1pvzwlzg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:42.957606Z' + timestamp: '2024-06-08T23:14:45Z' diff --git a/qai_hub_models/models/shufflenet_v2_quantized/README.md b/qai_hub_models/models/shufflenet_v2_quantized/README.md index 32b3c576..f2663608 100644 --- a/qai_hub_models/models/shufflenet_v2_quantized/README.md +++ b/qai_hub_models/models/shufflenet_v2_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Shufflenet-v2Quantized can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design](https://arxiv.org/abs/1807.11164) diff --git a/qai_hub_models/models/shufflenet_v2_quantized/evaluate.py b/qai_hub_models/models/shufflenet_v2_quantized/evaluate.py new file mode 100644 index 00000000..2fb7d9af --- /dev/null +++ b/qai_hub_models/models/shufflenet_v2_quantized/evaluate.py @@ -0,0 +1,63 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.shufflenet_v2_quantized import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.base_model import BaseModel +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args +from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + supports_ort=False, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + + # Use Fp16 model for torch inference + for cls in Model.__mro__: + if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin): + torch_cls = cls + break + torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/shufflenet_v2_quantized/export.py b/qai_hub_models/models/shufflenet_v2_quantized/export.py index 4692513f..437ca9e5 100644 --- a/qai_hub_models/models/shufflenet_v2_quantized/export.py +++ b/qai_hub_models/models/shufflenet_v2_quantized/export.py @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/shufflenet_v2_quantized/info.yaml b/qai_hub_models/models/shufflenet_v2_quantized/info.yaml index afd1d7e6..25dde27d 100644 --- a/qai_hub_models/models/shufflenet_v2_quantized/info.yaml +++ b/qai_hub_models/models/shufflenet_v2_quantized/info.yaml @@ -41,3 +41,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/shufflenet_v2_quantized/model.py b/qai_hub_models/models/shufflenet_v2_quantized/model.py index afc7535d..a3d7540d 100644 --- a/qai_hub_models/models/shufflenet_v2_quantized/model.py +++ b/qai_hub_models/models/shufflenet_v2_quantized/model.py @@ -26,7 +26,7 @@ from qai_hub_models.utils.asset_loaders import CachedWebModelAsset from qai_hub_models.utils.quantization_aimet import ( convert_all_depthwise_to_per_tensor, - tie_aimet_observer_groups, + tie_observers, ) MODEL_ID = __name__.split(".")[-2] @@ -82,7 +82,7 @@ def from_pretrained( dummy_input=dummy_input, ) convert_all_depthwise_to_per_tensor(sim.model) - cls._tie_pre_concat_quantizers(sim) + tie_observers(sim) constrain_quantized_inputs_to_image_range(sim) if aimet_encodings: @@ -94,57 +94,3 @@ def from_pretrained( sim.model.eval() return cls(sim) - - @classmethod - def _tie_pre_concat_quantizers(cls, sim: QuantizationSimModel): - """ - This ties together the output quantizers prior to concatenations. This - prevents unnecessary re-quantization during the concatenation. - """ - n = sim.model.net - # Because of skip connections, the groups are large - groups = [ - [ - getattr(getattr(n.stage2, "0").branch1, "4"), - getattr(getattr(n.stage2, "0").branch2, "7"), - getattr(n.stage2, "0").module_cat, - getattr(getattr(n.stage2, "1").branch2, "7"), - getattr(n.stage2, "1").module_cat_1, - getattr(getattr(n.stage2, "2").branch2, "7"), - getattr(n.stage2, "2").module_cat_2, - getattr(getattr(n.stage2, "3").branch2, "7"), - getattr(n.stage2, "3").module_cat_3, - ], - [ - getattr(getattr(n.stage3, "0").branch1, "4"), - getattr(getattr(n.stage3, "0").branch2, "7"), - getattr(n.stage3, "0").module_cat_4, - getattr(getattr(n.stage3, "1").branch2, "7"), - getattr(n.stage3, "1").module_cat_5, - getattr(getattr(n.stage3, "2").branch2, "7"), - getattr(n.stage3, "2").module_cat_6, - getattr(getattr(n.stage3, "3").branch2, "7"), - getattr(n.stage3, "3").module_cat_7, - getattr(getattr(n.stage3, "4").branch2, "7"), - getattr(n.stage3, "4").module_cat_8, - getattr(getattr(n.stage3, "5").branch2, "7"), - getattr(n.stage3, "5").module_cat_9, - getattr(getattr(n.stage3, "6").branch2, "7"), - getattr(n.stage3, "6").module_cat_10, - getattr(getattr(n.stage3, "7").branch2, "7"), - getattr(n.stage3, "7").module_cat_11, - ], - [ - getattr(getattr(n.stage4, "0").branch1, "4"), - getattr(getattr(n.stage4, "0").branch2, "7"), - getattr(n.stage4, "0").module_cat_12, - getattr(getattr(n.stage4, "1").branch2, "7"), - getattr(n.stage4, "1").module_cat_13, - getattr(getattr(n.stage4, "2").branch2, "7"), - getattr(n.stage4, "2").module_cat_14, - getattr(getattr(n.stage4, "3").branch2, "7"), - getattr(n.stage4, "3").module_cat_15, - ], - ] - - tie_aimet_observer_groups(groups) diff --git a/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml b/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml index 0b0a30d7..342366ce 100644 --- a/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml +++ b/qai_hub_models/models/shufflenet_v2_quantized/perf.yaml @@ -42,11 +42,11 @@ models: - name: Shufflenet-v2Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 635.0 - throughput: 1574.8031496062993 + inference_time: 629.0 + throughput: 1589.825119236884 estimated_peak_memory_range: - min: 12288 - max: 1986880 + min: 16384 + max: 1932240 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +54,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 205 - job_id: j1p87rlo5 + job_id: jlpe4v785 job_status: Passed torchscript_onnx_qnn: - inference_time: 588.0 - throughput: 1700.6802721088436 + inference_time: 582.0 + throughput: 1718.213058419244 estimated_peak_memory_range: - min: 184320 - max: 3281704 + min: 16384 + max: 102592048 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,7 +69,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: j1p3m7kng + job_id: jvgd7vd6g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -78,13 +78,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:42.991988Z' + timestamp: '2024-06-08T23:15:22Z' - torchscript_onnx_tflite: - inference_time: 455.0 - throughput: 2197.802197802198 + inference_time: 459.0 + throughput: 2178.649237472767 estimated_peak_memory_range: min: 12288 - max: 22965888 + max: 23307232 primary_compute_unit: NPU precision: int8 layer_info: @@ -92,14 +92,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 205 - job_id: jogky0jnp + job_id: jygzv7l4p job_status: Passed torchscript_onnx_qnn: - inference_time: 420.0 - throughput: 2380.9523809523807 + inference_time: 418.0 + throughput: 2392.3444976076553 estimated_peak_memory_range: - min: 0 - max: 46946848 + min: 163840 + max: 50012432 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,7 +107,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: jwgovwyk5 + job_id: jz5wm9lzg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -116,13 +116,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:42.992051Z' + timestamp: '2024-06-08T23:15:23Z' - torchscript_onnx_tflite: - inference_time: 623.0 - throughput: 1605.1364365971108 + inference_time: 649.0 + throughput: 1540.8320493066255 estimated_peak_memory_range: - min: 16384 - max: 1963320 + min: 12288 + max: 1631760 primary_compute_unit: NPU precision: int8 layer_info: @@ -130,14 +130,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 205 - job_id: jn5q21jo5 + job_id: jz5wm9l4g job_status: Passed torchscript_onnx_qnn: - inference_time: 586.0 - throughput: 1706.4846416382252 + inference_time: 585.0 + throughput: 1709.4017094017095 estimated_peak_memory_range: - min: 167936 - max: 23791000 + min: 163840 + max: 77147648 primary_compute_unit: NPU precision: int8 layer_info: @@ -145,7 +145,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: j7gjlyxep + job_id: jnp1q8nkg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -154,13 +154,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:42.992107Z' + timestamp: '2024-06-08T23:15:26Z' - torchscript_onnx_tflite: - inference_time: 972.0 - throughput: 1028.80658436214 + inference_time: 944.0 + throughput: 1059.322033898305 estimated_peak_memory_range: min: 12288 - max: 16548544 + max: 17202032 primary_compute_unit: NPU precision: int8 layer_info: @@ -168,22 +168,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 205 - job_id: j1glk8nmp - job_status: Passed - torchscript_onnx_qnn: - inference_time: 1101.0 - throughput: 908.2652134423251 - estimated_peak_memory_range: - min: 163840 - max: 43822880 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 122 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 122 - job_id: jlpevx9v5 + job_id: jmg994zmg job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -192,13 +177,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-05-29T18:59:42.992163Z' + timestamp: '2024-06-08T23:15:20Z' - torchscript_onnx_tflite: - inference_time: 10657.0 - throughput: 93.83503800319039 + inference_time: 8510.0 + throughput: 117.50881316098707 estimated_peak_memory_range: - min: 147456 - max: 5412728 + min: 65536 + max: 5500048 primary_compute_unit: CPU precision: fp32 layer_info: @@ -206,7 +191,7 @@ models: layers_on_gpu: 9 layers_on_cpu: 153 total_layers: 205 - job_id: jw561m6yp + job_id: jnp1q8nng job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -215,13 +200,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-05-29T18:59:42.992198Z' + timestamp: '2024-06-08T23:15:21Z' - torchscript_onnx_qnn: - inference_time: 709.0 - throughput: 1410.4372355430182 + inference_time: 694.0 + throughput: 1440.922190201729 estimated_peak_memory_range: - min: 589824 - max: 589824 + min: 618496 + max: 618496 primary_compute_unit: NPU precision: int8 layer_info: @@ -229,7 +214,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 122 - job_id: j1pvwm3rg + job_id: jmg994zqg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -238,4 +223,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:42.992226Z' + timestamp: '2024-06-08T23:15:24Z' diff --git a/qai_hub_models/models/sinet/README.md b/qai_hub_models/models/sinet/README.md index 601b6d46..82d1c945 100644 --- a/qai_hub_models/models/sinet/README.md +++ b/qai_hub_models/models/sinet/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of SINet can be found [here](https://github.com/clovaai/ext_portrait_segmentation/blob/master/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [SINet: Extreme Lightweight Portrait Segmentation Networks with Spatial Squeeze Modules and Information Blocking Decoder](https://arxiv.org/abs/1911.09099) diff --git a/qai_hub_models/models/sinet/export.py b/qai_hub_models/models/sinet/export.py index dcfe796d..495dde9f 100644 --- a/qai_hub_models/models/sinet/export.py +++ b/qai_hub_models/models/sinet/export.py @@ -187,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/sinet/perf.yaml b/qai_hub_models/models/sinet/perf.yaml index 8a9f923a..79d10a22 100644 --- a/qai_hub_models/models/sinet/perf.yaml +++ b/qai_hub_models/models/sinet/perf.yaml @@ -36,11 +36,11 @@ models: - name: SINet performance_metrics: - torchscript_onnx_tflite: - inference_time: 1838.0 - throughput: 544.069640914037 + inference_time: 1808.0 + throughput: 553.0973451327434 estimated_peak_memory_range: min: 16384 - max: 2227656 + max: 1874832 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: jvgdv4wzg + job_id: jo5mvz0y5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1175.0 - throughput: 851.063829787234 + inference_time: 1170.0 + throughput: 854.7008547008547 estimated_peak_memory_range: min: 16384 - max: 6837024 + max: 4437520 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: j0px1rvlg + job_id: jep23moxg job_status: Passed torchscript_onnx_ort: - inference_time: 2272.0 - throughput: 440.14084507042253 + inference_time: 2242.0 + throughput: 446.03033006244425 estimated_peak_memory_range: - min: 2109440 - max: 8476840 + min: 233472 + max: 61135024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 229 - job_id: jep2md8q5 + job_id: jogkry6y5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.027778Z' + timestamp: '2024-06-08T23:15:55Z' - torchscript_onnx_tflite: - inference_time: 1189.0 - throughput: 841.0428931875525 + inference_time: 1188.0 + throughput: 841.7508417508418 estimated_peak_memory_range: min: 12288 - max: 26086320 + max: 27213536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: jz57dnz95 + job_id: jegnre1v5 job_status: Passed torchscript_onnx_qnn: - inference_time: 783.0 - throughput: 1277.139208173691 + inference_time: 784.0 + throughput: 1275.5102040816328 estimated_peak_memory_range: - min: 12288 - max: 68156960 + min: 0 + max: 67399104 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jo5mzkr9p + job_id: jqpyvd8rp job_status: Passed torchscript_onnx_ort: - inference_time: 1540.0 - throughput: 649.3506493506494 + inference_time: 1564.0 + throughput: 639.386189258312 estimated_peak_memory_range: - min: 544768 - max: 27802000 + min: 12288 + max: 25637744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 229 - job_id: jqpyd2elp + job_id: jn5q9247p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.027880Z' + timestamp: '2024-06-08T23:15:55Z' - torchscript_onnx_tflite: - inference_time: 1802.0 - throughput: 554.9389567147614 + inference_time: 1809.0 + throughput: 552.791597567717 estimated_peak_memory_range: - min: 28672 - max: 1897304 + min: 12288 + max: 1931632 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 240 - job_id: jqp4w4q1g + job_id: jopr1yxvg job_status: Passed torchscript_onnx_qnn: - inference_time: 1174.0 - throughput: 851.7887563884157 + inference_time: 1183.0 + throughput: 845.30853761623 estimated_peak_memory_range: - min: 16384 - max: 19845704 + min: 622592 + max: 10230296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: joprydk7g + job_id: j1p8w7jzp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.027978Z' + timestamp: '2024-06-08T23:15:54Z' - torchscript_onnx_qnn: - inference_time: 1410.0 - throughput: 709.2198581560284 + inference_time: 1353.0 + throughput: 739.0983000739099 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 186 - job_id: jegneq2qg + job_id: j2p0ero25 job_status: Passed torchscript_onnx_ort: - inference_time: 2312.0 - throughput: 432.52595155709344 + inference_time: 2343.0 + throughput: 426.8032437046522 estimated_peak_memory_range: - min: 3145728 - max: 3145728 + min: 6090752 + max: 6090752 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 229 - job_id: j2p0r9ynp + job_id: j1glekwep job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.028046Z' + timestamp: '2024-06-08T23:15:56Z' diff --git a/qai_hub_models/models/squeezenet1_1/README.md b/qai_hub_models/models/squeezenet1_1/README.md index 879ef789..48b5f7ed 100644 --- a/qai_hub_models/models/squeezenet1_1/README.md +++ b/qai_hub_models/models/squeezenet1_1/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of SqueezeNet-1_1 can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size](https://arxiv.org/abs/1602.07360) diff --git a/qai_hub_models/models/squeezenet1_1/evaluate.py b/qai_hub_models/models/squeezenet1_1/evaluate.py new file mode 100644 index 00000000..eb509750 --- /dev/null +++ b/qai_hub_models/models/squeezenet1_1/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.squeezenet1_1 import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/squeezenet1_1/export.py b/qai_hub_models/models/squeezenet1_1/export.py index 5185e0bc..361dd2cc 100644 --- a/qai_hub_models/models/squeezenet1_1/export.py +++ b/qai_hub_models/models/squeezenet1_1/export.py @@ -188,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/squeezenet1_1/info.yaml b/qai_hub_models/models/squeezenet1_1/info.yaml index dba5e172..9cb093e0 100644 --- a/qai_hub_models/models/squeezenet1_1/info.yaml +++ b/qai_hub_models/models/squeezenet1_1/info.yaml @@ -40,3 +40,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/squeezenet1_1/perf.yaml b/qai_hub_models/models/squeezenet1_1/perf.yaml index 438ef1ef..e1645dcb 100644 --- a/qai_hub_models/models/squeezenet1_1/perf.yaml +++ b/qai_hub_models/models/squeezenet1_1/perf.yaml @@ -36,11 +36,11 @@ models: - name: SqueezeNet-1_1 performance_metrics: - torchscript_onnx_tflite: - inference_time: 660.0 - throughput: 1515.1515151515152 + inference_time: 646.0 + throughput: 1547.9876160990711 estimated_peak_memory_range: - min: 28672 - max: 5545600 + min: 20480 + max: 1582896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jogky0znp + job_id: j1p3qmox5 job_status: Passed torchscript_onnx_qnn: - inference_time: 705.0 - throughput: 1418.4397163120568 + inference_time: 702.0 + throughput: 1424.5014245014245 estimated_peak_memory_range: min: 16384 - max: 6892784 + max: 7170920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jw561mjyp + job_id: j7gjkl375 job_status: Passed torchscript_onnx_ort: - inference_time: 666.0 - throughput: 1501.5015015015015 + inference_time: 671.0 + throughput: 1490.312965722802 estimated_peak_memory_range: min: 12288 - max: 7528472 + max: 11919448 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: j7gjlymep + job_id: jmg994oqg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.062080Z' + timestamp: '2024-06-08T23:16:21Z' - torchscript_onnx_tflite: - inference_time: 462.0 - throughput: 2164.5021645021643 + inference_time: 452.0 + throughput: 2212.3893805309735 estimated_peak_memory_range: - min: 24576 - max: 22715072 + min: 18399232 + max: 41710416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jn5q218o5 + job_id: jwgoevd4p job_status: Passed torchscript_onnx_qnn: - inference_time: 491.0 - throughput: 2036.6598778004072 + inference_time: 492.0 + throughput: 2032.520325203252 estimated_peak_memory_range: - min: 618496 - max: 31784512 + min: 0 + max: 32687824 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: j1p3m73ng + job_id: jlpe4v675 job_status: Passed torchscript_onnx_ort: - inference_time: 485.0 - throughput: 2061.855670103093 + inference_time: 477.0 + throughput: 2096.4360587002097 estimated_peak_memory_range: min: 12288 - max: 17557776 + max: 19637184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jlpevx1v5 + job_id: jnp1q8okg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.062134Z' + timestamp: '2024-06-08T23:16:22Z' - torchscript_onnx_tflite: - inference_time: 655.0 - throughput: 1526.7175572519084 + inference_time: 664.0 + throughput: 1506.0240963855422 estimated_peak_memory_range: - min: 24576 - max: 1501432 + min: 20480 + max: 1398696 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: j1glk8zmp + job_id: j1pvzw27g job_status: Passed torchscript_onnx_qnn: - inference_time: 705.0 - throughput: 1418.4397163120568 + inference_time: 698.0 + throughput: 1432.6647564469913 estimated_peak_memory_range: - min: 622592 - max: 6571960 + min: 634880 + max: 7259784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: j1pvwmorg + job_id: jz5wm9yzg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.062167Z' + timestamp: '2024-06-08T23:16:20Z' - torchscript_onnx_qnn: - inference_time: 825.0 - throughput: 1212.121212121212 + inference_time: 801.0 + throughput: 1248.4394506866417 estimated_peak_memory_range: - min: 602112 - max: 602112 + min: 606208 + max: 606208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 70 - job_id: jwgovw0k5 + job_id: jygzv7zzp job_status: Passed torchscript_onnx_ort: - inference_time: 670.0 - throughput: 1492.5373134328358 + inference_time: 681.0 + throughput: 1468.4287812041116 estimated_peak_memory_range: - min: 2846720 - max: 2846720 + min: 3670016 + max: 3670016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 71 - job_id: jygz7y9xp + job_id: jvgd7v6kg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.062205Z' + timestamp: '2024-06-08T23:16:23Z' diff --git a/qai_hub_models/models/squeezenet1_1_quantized/README.md b/qai_hub_models/models/squeezenet1_1_quantized/README.md index 77ea9cee..e7e60338 100644 --- a/qai_hub_models/models/squeezenet1_1_quantized/README.md +++ b/qai_hub_models/models/squeezenet1_1_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of SqueezeNet-1_1Quantized can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size](https://arxiv.org/abs/1602.07360) diff --git a/qai_hub_models/models/squeezenet1_1_quantized/evaluate.py b/qai_hub_models/models/squeezenet1_1_quantized/evaluate.py new file mode 100644 index 00000000..bdaf6536 --- /dev/null +++ b/qai_hub_models/models/squeezenet1_1_quantized/evaluate.py @@ -0,0 +1,62 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.squeezenet1_1_quantized import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.base_model import BaseModel +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args +from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + + # Use Fp16 model for torch inference + for cls in Model.__mro__: + if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin): + torch_cls = cls + break + torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/squeezenet1_1_quantized/export.py b/qai_hub_models/models/squeezenet1_1_quantized/export.py index ba6e7087..f68f076e 100644 --- a/qai_hub_models/models/squeezenet1_1_quantized/export.py +++ b/qai_hub_models/models/squeezenet1_1_quantized/export.py @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/squeezenet1_1_quantized/info.yaml b/qai_hub_models/models/squeezenet1_1_quantized/info.yaml index 8daf5c4d..6675eada 100644 --- a/qai_hub_models/models/squeezenet1_1_quantized/info.yaml +++ b/qai_hub_models/models/squeezenet1_1_quantized/info.yaml @@ -41,3 +41,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml b/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml index 19b393c3..1d3b45b2 100644 --- a/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml +++ b/qai_hub_models/models/squeezenet1_1_quantized/perf.yaml @@ -42,11 +42,11 @@ models: - name: SqueezeNet-1_1Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 218.0 - throughput: 4587.155963302752 + inference_time: 220.0 + throughput: 4545.454545454545 estimated_peak_memory_range: - min: 12288 - max: 3523192 + min: 16384 + max: 1715824 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +54,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jmg942185 + job_id: jqp4jweqp job_status: Passed torchscript_onnx_qnn: - inference_time: 470.0 - throughput: 2127.659574468085 + inference_time: 467.0 + throughput: 2141.3276231263385 estimated_peak_memory_range: - min: 20480 - max: 9844736 + min: 167936 + max: 10118072 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +69,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 45 - job_id: jnp181lng + job_id: jep23mdxg job_status: Passed torchscript_onnx_ort: - inference_time: 563.0 - throughput: 1776.1989342806394 + inference_time: 450.0 + throughput: 2222.222222222222 estimated_peak_memory_range: - min: 618496 - max: 7541392 + min: 12288 + max: 5507096 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 49 + layers_on_npu: 47 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 49 - job_id: jo5mzk27p + total_layers: 47 + job_id: jogkry0y5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +93,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.096271Z' + timestamp: '2024-06-08T23:16:52Z' - torchscript_onnx_tflite: - inference_time: 180.0 - throughput: 5555.555555555556 + inference_time: 179.0 + throughput: 5586.592178770949 estimated_peak_memory_range: min: 12288 - max: 22121184 + max: 22450960 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +107,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jnp181l7g + job_id: j0pxe10j5 job_status: Passed torchscript_onnx_qnn: inference_time: 342.0 throughput: 2923.9766081871344 estimated_peak_memory_range: - min: 163840 - max: 28352000 + min: 12288 + max: 27530432 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +122,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 45 - job_id: jvgdv496g + job_id: jqpyvd2rp job_status: Passed torchscript_onnx_ort: - inference_time: 444.0 - throughput: 2252.252252252252 + inference_time: 372.0 + throughput: 2688.1720430107525 estimated_peak_memory_range: min: 12288 - max: 14450080 + max: 15334176 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 49 + layers_on_npu: 47 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 49 - job_id: jegneqyjg + total_layers: 47 + job_id: jn5q9217p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +146,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.096323Z' + timestamp: '2024-06-08T23:16:53Z' - torchscript_onnx_tflite: - inference_time: 221.0 - throughput: 4524.886877828054 + inference_time: 223.0 + throughput: 4484.304932735426 estimated_peak_memory_range: - min: 12288 - max: 2010568 + min: 20480 + max: 1471296 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +160,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jvgdv49zg + job_id: jo5mvz9y5 job_status: Passed torchscript_onnx_qnn: - inference_time: 461.0 - throughput: 2169.1973969631235 + inference_time: 464.0 + throughput: 2155.1724137931033 estimated_peak_memory_range: - min: 172032 - max: 42070744 + min: 28672 + max: 17992504 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +175,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 45 - job_id: jqp4w4o2g + job_id: j1p8w7rzp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +184,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.096354Z' + timestamp: '2024-06-08T23:16:51Z' - torchscript_onnx_tflite: - inference_time: 533.0 - throughput: 1876.172607879925 + inference_time: 526.0 + throughput: 1901.1406844106464 estimated_peak_memory_range: min: 12288 - max: 15187344 + max: 14752288 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,22 +198,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jz5w9zv4p - job_status: Passed - torchscript_onnx_qnn: - inference_time: 961.0 - throughput: 1040.5827263267429 - estimated_peak_memory_range: - min: 12288 - max: 23287696 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 45 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 45 - job_id: j0px1rj8g + job_id: jegnreqv5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -222,13 +207,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-05-29T18:59:43.096386Z' + timestamp: '2024-06-08T23:16:46Z' - torchscript_onnx_tflite: - inference_time: 4102.0 - throughput: 243.78352023403218 + inference_time: 4092.0 + throughput: 244.37927663734115 estimated_peak_memory_range: - min: 16384 - max: 7045232 + min: 20480 + max: 7234128 primary_compute_unit: NPU precision: int8 layer_info: @@ -236,7 +221,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 41 - job_id: jmg9421m5 + job_id: jopr1ydvg job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -245,13 +230,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-05-29T18:59:43.096403Z' + timestamp: '2024-06-08T23:16:47Z' - torchscript_onnx_qnn: - inference_time: 562.0 - throughput: 1779.3594306049822 + inference_time: 536.0 + throughput: 1865.6716417910447 estimated_peak_memory_range: - min: 598016 - max: 598016 + min: 1843200 + max: 1843200 primary_compute_unit: NPU precision: int8 layer_info: @@ -259,22 +244,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 45 - job_id: jz57dnwn5 + job_id: j2p0er925 job_status: Passed torchscript_onnx_ort: - inference_time: 585.0 - throughput: 1709.4017094017095 + inference_time: 472.0 + throughput: 2118.64406779661 estimated_peak_memory_range: - min: 2117632 - max: 2117632 + min: 2641920 + max: 2641920 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 49 + layers_on_npu: 47 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 49 - job_id: joprydqkg + total_layers: 47 + job_id: j1glek8ep job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -283,4 +268,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.096437Z' + timestamp: '2024-06-08T23:16:54Z' diff --git a/qai_hub_models/models/stable_diffusion_v1_5_quantized/README.md b/qai_hub_models/models/stable_diffusion_v1_5_quantized/README.md index e7447ff1..286aab8d 100644 --- a/qai_hub_models/models/stable_diffusion_v1_5_quantized/README.md +++ b/qai_hub_models/models/stable_diffusion_v1_5_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Stable-Diffusion-v1.5 can be found [here](https://github.com/CompVis/stable-diffusion/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://github.com/CompVis/stable-diffusion/blob/main/LICENSE) ## References * [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) diff --git a/qai_hub_models/models/stable_diffusion_v2_1_quantized/README.md b/qai_hub_models/models/stable_diffusion_v2_1_quantized/README.md index ade8bee8..69677b00 100644 --- a/qai_hub_models/models/stable_diffusion_v2_1_quantized/README.md +++ b/qai_hub_models/models/stable_diffusion_v2_1_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Stable-Diffusion-v2.1 can be found [here](https://github.com/CompVis/stable-diffusion/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://github.com/CompVis/stable-diffusion/blob/main/LICENSE) ## References * [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) diff --git a/qai_hub_models/models/stylegan2/README.md b/qai_hub_models/models/stylegan2/README.md index ea9e6792..6ff284c8 100644 --- a/qai_hub_models/models/stylegan2/README.md +++ b/qai_hub_models/models/stylegan2/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of StyleGAN2 can be found [here](https://github.com/NVlabs/stylegan3/blob/main/LICENSE.txt). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Analyzing and Improving the Image Quality of StyleGAN](http://arxiv.org/abs/1912.04958) diff --git a/qai_hub_models/models/stylegan2/export.py b/qai_hub_models/models/stylegan2/export.py index 4a1053bc..5a712592 100644 --- a/qai_hub_models/models/stylegan2/export.py +++ b/qai_hub_models/models/stylegan2/export.py @@ -179,7 +179,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -213,7 +213,12 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) + parser = export_parser( + model_cls=Model, + supports_qnn=False, + supports_ort=False, + supports_precompiled_ort=False, + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/stylegan2/perf.yaml b/qai_hub_models/models/stylegan2/perf.yaml index b308729f..938965f9 100644 --- a/qai_hub_models/models/stylegan2/perf.yaml +++ b/qai_hub_models/models/stylegan2/perf.yaml @@ -36,11 +36,11 @@ models: - name: StyleGAN2 performance_metrics: - torchscript_onnx_tflite: - inference_time: 1684726.0 - throughput: 0.5935683309926956 + inference_time: 1649413.0 + throughput: 0.6062762934450013 estimated_peak_memory_range: - min: 1399386112 - max: 1408039328 + min: 1397805056 + max: 2230233016 primary_compute_unit: CPU precision: fp32 layer_info: @@ -48,7 +48,7 @@ models: layers_on_gpu: 78 layers_on_cpu: 402 total_layers: 480 - job_id: jqpyd2w0p + job_id: j1p3qm7x5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -57,13 +57,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.139435Z' + timestamp: '2024-06-08T23:17:52Z' - torchscript_onnx_tflite: - inference_time: 1246952.0 - throughput: 0.8019554882625795 + inference_time: 1311471.0 + throughput: 0.7625025639148711 estimated_peak_memory_range: - min: 1057603584 - max: 1090183616 + min: 1184645120 + max: 1218773040 primary_compute_unit: CPU precision: fp32 layer_info: @@ -71,7 +71,7 @@ models: layers_on_gpu: 78 layers_on_cpu: 402 total_layers: 480 - job_id: j2p0r970p + job_id: jwgoevw4p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -80,13 +80,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.139505Z' + timestamp: '2024-06-08T23:17:53Z' - torchscript_onnx_tflite: - inference_time: 1690139.0 - throughput: 0.5916673125701496 + inference_time: 1578379.0 + throughput: 0.6335613943165742 estimated_peak_memory_range: - min: 826093568 - max: 2179375456 + min: 1049174016 + max: 1057203192 primary_compute_unit: CPU precision: fp32 layer_info: @@ -94,7 +94,7 @@ models: layers_on_gpu: 78 layers_on_cpu: 402 total_layers: 480 - job_id: j1p87rvq5 + job_id: j1pvzwm7g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -103,12 +103,4 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.139572Z' - - reference_device_info: - name: Snapdragon X Elite CRD - os: '11' - form_factor: Compute - os_name: Windows - manufacturer: Qualcomm - chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.139580Z' + timestamp: '2024-06-08T23:17:54Z' diff --git a/qai_hub_models/models/swin_base/README.md b/qai_hub_models/models/swin_base/README.md index e1b53caa..ec878b5d 100644 --- a/qai_hub_models/models/swin_base/README.md +++ b/qai_hub_models/models/swin_base/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Swin-Base can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030) diff --git a/qai_hub_models/models/swin_base/evaluate.py b/qai_hub_models/models/swin_base/evaluate.py new file mode 100644 index 00000000..052bc1fc --- /dev/null +++ b/qai_hub_models/models/swin_base/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.swin_base import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/swin_base/export.py b/qai_hub_models/models/swin_base/export.py index a2591912..79d16b96 100644 --- a/qai_hub_models/models/swin_base/export.py +++ b/qai_hub_models/models/swin_base/export.py @@ -188,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/swin_base/info.yaml b/qai_hub_models/models/swin_base/info.yaml index 00a55170..04918f59 100644 --- a/qai_hub_models/models/swin_base/info.yaml +++ b/qai_hub_models/models/swin_base/info.yaml @@ -44,3 +44,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/swin_base/perf.yaml b/qai_hub_models/models/swin_base/perf.yaml index 848eeced..37ad14fa 100644 --- a/qai_hub_models/models/swin_base/perf.yaml +++ b/qai_hub_models/models/swin_base/perf.yaml @@ -36,11 +36,11 @@ models: - name: Swin-Base performance_metrics: - torchscript_onnx_tflite: - inference_time: 38343.0 - throughput: 26.080379730328875 + inference_time: 38045.0 + throughput: 26.284662899198317 estimated_peak_memory_range: - min: 57344 - max: 3970680 + min: 307200 + max: 3648376 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1568 - job_id: j7gjly41p + job_id: jz57vdnq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 31373.0 - throughput: 31.874541803461575 + inference_time: 31404.0 + throughput: 31.84307731499172 estimated_peak_memory_range: - min: 16384 - max: 47301400 + min: 57344 + max: 46336408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: jz5w9zn4p + job_id: jo5mvzky5 job_status: Passed torchscript_onnx_ort: - inference_time: 64145.0 - throughput: 15.589679632083561 + inference_time: 63106.0 + throughput: 15.846353754001205 estimated_peak_memory_range: - min: 253952 - max: 473800768 + min: 278528 + max: 457269496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1163 - job_id: jz57dn3n5 + job_id: jqpyvdyrp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.157642Z' + timestamp: '2024-06-08T23:18:38Z' - torchscript_onnx_tflite: - inference_time: 26180.0 - throughput: 38.19709702062643 + inference_time: 26266.0 + throughput: 38.07203228508338 estimated_peak_memory_range: - min: 40960 - max: 499707440 + min: 49152 + max: 501753168 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1568 - job_id: jlpevx385 + job_id: jqp4jw4qp job_status: Passed torchscript_onnx_qnn: - inference_time: 22009.0 - throughput: 45.43595801717479 + inference_time: 22072.0 + throughput: 45.30627038782168 estimated_peak_memory_range: min: 0 - max: 410340528 + max: 409890496 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: jmg942em5 + job_id: jegnrewv5 job_status: Passed torchscript_onnx_ort: - inference_time: 44507.0 - throughput: 22.468375761116228 + inference_time: 44119.0 + throughput: 22.66597157687164 estimated_peak_memory_range: - min: 626688 - max: 205027936 + min: 643072 + max: 204011072 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1163 - job_id: jqp4w402g + job_id: j2p0erx25 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.158083Z' + timestamp: '2024-06-08T23:18:39Z' - torchscript_onnx_tflite: - inference_time: 38498.0 - throughput: 25.975375344173724 + inference_time: 38074.0 + throughput: 26.264642538215053 estimated_peak_memory_range: - min: 40960 - max: 3943568 + min: 61440 + max: 4041520 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1568 - job_id: jygz7yk4p + job_id: j0pxe1rj5 job_status: Passed torchscript_onnx_qnn: - inference_time: 31489.0 - throughput: 31.757121534504112 + inference_time: 31252.0 + throughput: 31.997952131063613 estimated_peak_memory_range: - min: 49152 - max: 48603792 + min: 61440 + max: 51901248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: jvgdv4l6g + job_id: jep23mzxg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.158389Z' + timestamp: '2024-06-08T23:18:37Z' - torchscript_onnx_qnn: - inference_time: 39136.0 - throughput: 25.551921504497138 + inference_time: 38623.0 + throughput: 25.89130828780778 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1255 - job_id: jnp181xng + job_id: jopr1y7vg job_status: Passed torchscript_onnx_ort: - inference_time: 66331.0 - throughput: 15.075907192715322 + inference_time: 65447.0 + throughput: 15.27953916909866 estimated_peak_memory_range: - min: 685400064 - max: 685400064 + min: 552267776 + max: 552267776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1163 - job_id: j0px1r28g + job_id: j1p8w7kzp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.158657Z' + timestamp: '2024-06-08T23:18:40Z' diff --git a/qai_hub_models/models/swin_small/README.md b/qai_hub_models/models/swin_small/README.md index 01c8a31a..a661caf5 100644 --- a/qai_hub_models/models/swin_small/README.md +++ b/qai_hub_models/models/swin_small/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Swin-Small can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030) diff --git a/qai_hub_models/models/swin_small/evaluate.py b/qai_hub_models/models/swin_small/evaluate.py new file mode 100644 index 00000000..8f1f1388 --- /dev/null +++ b/qai_hub_models/models/swin_small/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.swin_small import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/swin_small/export.py b/qai_hub_models/models/swin_small/export.py index 82947e38..67677bbe 100644 --- a/qai_hub_models/models/swin_small/export.py +++ b/qai_hub_models/models/swin_small/export.py @@ -188,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/swin_small/info.yaml b/qai_hub_models/models/swin_small/info.yaml index ac042fe2..2a22d62e 100644 --- a/qai_hub_models/models/swin_small/info.yaml +++ b/qai_hub_models/models/swin_small/info.yaml @@ -43,3 +43,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/swin_small/perf.yaml b/qai_hub_models/models/swin_small/perf.yaml index 3253371f..4bd928fb 100644 --- a/qai_hub_models/models/swin_small/perf.yaml +++ b/qai_hub_models/models/swin_small/perf.yaml @@ -36,11 +36,11 @@ models: - name: Swin-Small performance_metrics: - torchscript_onnx_tflite: - inference_time: 29371.0 - throughput: 34.047189404514654 + inference_time: 29054.0 + throughput: 34.41866868589523 estimated_peak_memory_range: - min: 57344 - max: 3729912 + min: 24576 + max: 7976680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1563 - job_id: jegneq8jg + job_id: jn5q92d7p job_status: Passed torchscript_onnx_qnn: - inference_time: 23540.0 - throughput: 42.48088360237893 + inference_time: 23697.0 + throughput: 42.19943452757733 estimated_peak_memory_range: - min: 49152 - max: 39448744 + min: 0 + max: 40982576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1246 - job_id: jqpyd200p + job_id: j1p3qmrx5 job_status: Passed torchscript_onnx_ort: - inference_time: 56967.0 - throughput: 17.55402250425685 + inference_time: 56535.0 + throughput: 17.688157778367383 estimated_peak_memory_range: - min: 180224 - max: 257939904 + min: 57344 + max: 250098192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1158 - job_id: j1glkqm2p + job_id: jlpe4vn75 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.193426Z' + timestamp: '2024-06-08T23:19:17Z' - torchscript_onnx_tflite: - inference_time: 19697.0 - throughput: 50.76915266284206 + inference_time: 19652.0 + throughput: 50.8854060655404 estimated_peak_memory_range: - min: 36864 - max: 467583248 + min: 45056 + max: 468730016 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1563 - job_id: joprydjkg + job_id: j1glekqep job_status: Passed torchscript_onnx_qnn: - inference_time: 16158.0 - throughput: 61.888847629657135 + inference_time: 16097.0 + throughput: 62.123377026775174 estimated_peak_memory_range: min: 0 - max: 376508304 + max: 371590576 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1246 - job_id: j1p87kyq5 + job_id: jwgoev94p job_status: Passed torchscript_onnx_ort: - inference_time: 39073.0 - throughput: 25.593120569191 + inference_time: 39326.0 + throughput: 25.42846971469257 estimated_peak_memory_range: - min: 618496 - max: 172619200 + min: 651264 + max: 174791408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1158 - job_id: jw56104np + job_id: jygzv70zp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.193885Z' + timestamp: '2024-06-08T23:19:18Z' - torchscript_onnx_tflite: - inference_time: 29137.0 - throughput: 34.320623262518446 + inference_time: 29025.0 + throughput: 34.45305770887166 estimated_peak_memory_range: - min: 81920 - max: 3564816 + min: 69632 + max: 3142616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1563 - job_id: jep2mdn65 + job_id: jw56q10vg job_status: Passed torchscript_onnx_qnn: - inference_time: 23539.0 - throughput: 42.48268830451591 + inference_time: 23503.0 + throughput: 42.54775986044335 estimated_peak_memory_range: - min: 53248 - max: 40793896 + min: 36864 + max: 38372320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1246 - job_id: jn5q2dqe5 + job_id: j7gjkl875 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.194196Z' + timestamp: '2024-06-08T23:19:16Z' - torchscript_onnx_qnn: - inference_time: 23958.0 - throughput: 41.73971116119876 + inference_time: 23778.0 + throughput: 42.055681722600724 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1246 - job_id: jogkykxvp + job_id: j1pvzwn7g job_status: Passed torchscript_onnx_ort: - inference_time: 58912.0 - throughput: 16.97447039652363 + inference_time: 58093.0 + throughput: 17.213777907837432 estimated_peak_memory_range: - min: 467292160 - max: 467292160 + min: 385679360 + max: 385679360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1158 - job_id: j1p3mr0mg + job_id: jz5wm9rzg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.194465Z' + timestamp: '2024-06-08T23:19:19Z' diff --git a/qai_hub_models/models/swin_tiny/README.md b/qai_hub_models/models/swin_tiny/README.md index 8549a629..e0733e34 100644 --- a/qai_hub_models/models/swin_tiny/README.md +++ b/qai_hub_models/models/swin_tiny/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Swin-Tiny can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030) diff --git a/qai_hub_models/models/swin_tiny/evaluate.py b/qai_hub_models/models/swin_tiny/evaluate.py new file mode 100644 index 00000000..5c7b00e7 --- /dev/null +++ b/qai_hub_models/models/swin_tiny/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.swin_tiny import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/swin_tiny/export.py b/qai_hub_models/models/swin_tiny/export.py index df0a32e2..fbe5734d 100644 --- a/qai_hub_models/models/swin_tiny/export.py +++ b/qai_hub_models/models/swin_tiny/export.py @@ -188,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/swin_tiny/info.yaml b/qai_hub_models/models/swin_tiny/info.yaml index aee47f6a..9a83a696 100644 --- a/qai_hub_models/models/swin_tiny/info.yaml +++ b/qai_hub_models/models/swin_tiny/info.yaml @@ -43,3 +43,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/swin_tiny/perf.yaml b/qai_hub_models/models/swin_tiny/perf.yaml index 79d0a222..7281ba04 100644 --- a/qai_hub_models/models/swin_tiny/perf.yaml +++ b/qai_hub_models/models/swin_tiny/perf.yaml @@ -36,11 +36,11 @@ models: - name: Swin-Tiny performance_metrics: - torchscript_onnx_tflite: - inference_time: 17622.0 - throughput: 56.74724775848372 + inference_time: 17582.0 + throughput: 56.87635081333182 estimated_peak_memory_range: - min: 20480 - max: 3034712 + min: 49152 + max: 3052248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 837 - job_id: jlpevnm85 + job_id: jnp1q8mkg job_status: Passed torchscript_onnx_qnn: - inference_time: 14919.0 - throughput: 67.02862122126147 + inference_time: 14870.0 + throughput: 67.24949562878278 estimated_peak_memory_range: - min: 49152 - max: 37995888 + min: 40960 + max: 28468704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 700 - job_id: jmg94qnm5 + job_id: jqp4jw2qp job_status: Passed torchscript_onnx_ort: - inference_time: 34287.0 - throughput: 29.165572957680755 + inference_time: 33752.0 + throughput: 29.627873903768666 estimated_peak_memory_range: - min: 77824 - max: 165377480 + min: 0 + max: 143848064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 624 - job_id: jqp4w2r2g + job_id: jopr1ymvg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.229036Z' + timestamp: '2024-06-08T23:19:49Z' - torchscript_onnx_tflite: - inference_time: 11801.0 - throughput: 84.7385814761461 + inference_time: 11836.0 + throughput: 84.48800270361609 estimated_peak_memory_range: min: 40960 - max: 288481344 + max: 291213504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 837 - job_id: jygz70d4p + job_id: jvgd7vmkg job_status: Passed torchscript_onnx_qnn: - inference_time: 9948.0 - throughput: 100.52271813429836 + inference_time: 9960.0 + throughput: 100.40160642570281 estimated_peak_memory_range: min: 618496 - max: 230153840 + max: 226851856 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 700 - job_id: jnp18mzng + job_id: j0pxe1zj5 job_status: Passed torchscript_onnx_ort: - inference_time: 23929.0 - throughput: 41.79029629320072 + inference_time: 23820.0 + throughput: 41.98152812762385 estimated_peak_memory_range: - min: 36864 - max: 111906272 + min: 53248 + max: 113324624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 624 - job_id: j0px1zo8g + job_id: jep23mqxg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.229264Z' + timestamp: '2024-06-08T23:19:50Z' - torchscript_onnx_tflite: - inference_time: 17581.0 - throughput: 56.87958591661453 + inference_time: 17413.0 + throughput: 57.42835812324125 estimated_peak_memory_range: - min: 90112 - max: 2519456 + min: 24576 + max: 3013416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 837 - job_id: jz5w9r64p + job_id: jz57vd8q5 job_status: Passed torchscript_onnx_qnn: - inference_time: 14955.0 - throughput: 66.86726847208291 + inference_time: 14630.0 + throughput: 68.3526999316473 estimated_peak_memory_range: - min: 245760 - max: 27630984 + min: 12288 + max: 29408864 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 700 - job_id: jz57d8rn5 + job_id: jegnredv5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.229422Z' + timestamp: '2024-06-08T23:19:48Z' - torchscript_onnx_qnn: - inference_time: 14657.0 - throughput: 68.22678583611926 + inference_time: 14162.0 + throughput: 70.61149555147578 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 700 - job_id: jvgdvm16g + job_id: jo5mvzly5 job_status: Passed torchscript_onnx_ort: - inference_time: 35485.0 - throughput: 28.180921516133576 + inference_time: 34948.0 + throughput: 28.613940711914847 estimated_peak_memory_range: - min: 241364992 - max: 241364992 + min: 211316736 + max: 211316736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 624 - job_id: jo5mzlx7p + job_id: jqpyvdkrp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.229561Z' + timestamp: '2024-06-08T23:19:50Z' diff --git a/qai_hub_models/models/trocr/README.md b/qai_hub_models/models/trocr/README.md index 8e1b963a..429f2e2f 100644 --- a/qai_hub_models/models/trocr/README.md +++ b/qai_hub_models/models/trocr/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of TrOCR can be found [here](https://github.com/microsoft/unilm/blob/master/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [TrOCR: Transformer-based Optical Character Recognition with Pre-trained Models](https://arxiv.org/abs/2109.10282) diff --git a/qai_hub_models/models/trocr/export.py b/qai_hub_models/models/trocr/export.py index 8b74261b..7c62002b 100644 --- a/qai_hub_models/models/trocr/export.py +++ b/qai_hub_models/models/trocr/export.py @@ -194,7 +194,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -237,7 +237,10 @@ def export_model( def main(): warnings.filterwarnings("ignore") parser = export_parser( - model_cls=Model, components=ALL_COMPONENTS, supports_qnn=False + model_cls=Model, + components=ALL_COMPONENTS, + supports_qnn=False, + supports_precompiled_ort=False, ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/trocr/perf.yaml b/qai_hub_models/models/trocr/perf.yaml index 521a6a8a..25bfdd7d 100644 --- a/qai_hub_models/models/trocr/perf.yaml +++ b/qai_hub_models/models/trocr/perf.yaml @@ -36,11 +36,11 @@ models: - name: TrOCREncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 149720.0 - throughput: 6.67913438418381 + inference_time: 148428.0 + throughput: 6.737273290753766 estimated_peak_memory_range: - min: 7241728 - max: 10559328 + min: 6459392 + max: 9952352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 592 - job_id: jopry73kg + job_id: j1p8w7dzp job_status: Passed torchscript_onnx_ort: - inference_time: 111138.0 - throughput: 8.997822526948479 + inference_time: 109810.0 + throughput: 9.106638739641198 estimated_peak_memory_range: - min: 14254080 - max: 129724112 + min: 14303232 + max: 127415872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +63,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 396 - job_id: j7gjl8v1p + job_id: jmg9947qg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,13 +72,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.263746Z' + timestamp: '2024-06-08T23:20:35Z' - torchscript_onnx_tflite: - inference_time: 111585.0 - throughput: 8.961778016758524 + inference_time: 111077.0 + throughput: 9.00276384850149 estimated_peak_memory_range: - min: 5992448 - max: 348296944 + min: 6410240 + max: 350751520 primary_compute_unit: NPU precision: fp16 layer_info: @@ -86,14 +86,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 592 - job_id: jqpydy30p + job_id: jn5q92x7p job_status: Passed torchscript_onnx_ort: - inference_time: 84470.0 - throughput: 11.838522552385463 + inference_time: 83685.0 + throughput: 11.9495728027723 estimated_peak_memory_range: - min: 16490496 - max: 91372800 + min: 12636160 + max: 89203248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +101,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 396 - job_id: jygz7034p + job_id: jvgd7vykg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,13 +110,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.263862Z' + timestamp: '2024-06-08T23:20:37Z' - torchscript_onnx_tflite: - inference_time: 149520.0 - throughput: 6.688068485821295 + inference_time: 148360.0 + throughput: 6.740361283364789 estimated_peak_memory_range: - min: 16384 - max: 12280096 + min: 7380992 + max: 9974128 primary_compute_unit: NPU precision: fp16 layer_info: @@ -124,7 +124,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 592 - job_id: j1p87kqq5 + job_id: jw56q19vg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -133,13 +133,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.263936Z' + timestamp: '2024-06-08T23:20:26Z' - torchscript_onnx_ort: - inference_time: 111711.0 - throughput: 8.951669934026192 + inference_time: 109878.0 + throughput: 9.101002930522943 estimated_peak_memory_range: - min: 36442112 - max: 36442112 + min: 28672 + max: 28672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -147,7 +147,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 396 - job_id: jmg94qlm5 + job_id: jmg9947vg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -156,15 +156,15 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.264003Z' + timestamp: '2024-06-08T23:20:38Z' - name: TrOCRDecoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 2734.0 - throughput: 365.764447695684 + inference_time: 2732.0 + throughput: 366.03221083455344 estimated_peak_memory_range: - min: 16384 - max: 2242304 + min: 12288 + max: 2455200 primary_compute_unit: NPU precision: fp16 layer_info: @@ -172,14 +172,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 370 - job_id: jep2mzy65 + job_id: jogkrywy5 job_status: Passed torchscript_onnx_ort: - inference_time: 2986.0 - throughput: 334.8961821835231 + inference_time: 2915.0 + throughput: 343.0531732418525 estimated_peak_memory_range: - min: 122880 - max: 562369864 + min: 28672 + max: 588384064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -187,7 +187,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 1 total_layers: 353 - job_id: jlpevnd85 + job_id: jnp1q8kkg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -196,13 +196,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.264092Z' + timestamp: '2024-06-08T23:20:35Z' - torchscript_onnx_tflite: - inference_time: 1972.0 - throughput: 507.0993914807302 + inference_time: 1997.0 + throughput: 500.75112669003505 estimated_peak_memory_range: min: 12288 - max: 193171136 + max: 195170736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -210,14 +210,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 370 - job_id: j2p0rxz0p + job_id: j1glek9ep job_status: Passed torchscript_onnx_ort: - inference_time: 2087.0 - throughput: 479.1566842357451 + inference_time: 2106.0 + throughput: 474.8338081671415 estimated_peak_memory_range: min: 0 - max: 47494016 + max: 49553392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -225,7 +225,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 1 total_layers: 353 - job_id: jz5w9re4p + job_id: jz5wm90jg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -234,13 +234,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.264179Z' + timestamp: '2024-06-08T23:20:37Z' - torchscript_onnx_tflite: - inference_time: 2738.0 - throughput: 365.23009495982467 + inference_time: 2737.0 + throughput: 365.36353671903544 estimated_peak_memory_range: min: 16384 - max: 2113536 + max: 3465512 primary_compute_unit: NPU precision: fp16 layer_info: @@ -248,7 +248,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 370 - job_id: jogkykevp + job_id: j1p3qmlx5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -257,13 +257,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.264226Z' + timestamp: '2024-06-08T23:20:27Z' - torchscript_onnx_ort: - inference_time: 2608.0 - throughput: 383.4355828220859 + inference_time: 2812.0 + throughput: 355.6187766714082 estimated_peak_memory_range: - min: 356294656 - max: 356294656 + min: 352550912 + max: 352550912 primary_compute_unit: NPU precision: fp16 layer_info: @@ -271,7 +271,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 1 total_layers: 353 - job_id: jnp18m4ng + job_id: jnp1q8klg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -280,4 +280,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.264271Z' + timestamp: '2024-06-08T23:20:39Z' diff --git a/qai_hub_models/models/unet_segmentation/README.md b/qai_hub_models/models/unet_segmentation/README.md index 78dfce4f..c0d3d342 100644 --- a/qai_hub_models/models/unet_segmentation/README.md +++ b/qai_hub_models/models/unet_segmentation/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Unet-Segmentation can be found [here](https://github.com/milesial/Pytorch-UNet/blob/master/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://github.com/milesial/Pytorch-UNet/blob/master/LICENSE) ## References * [U-Net: Convolutional Networks for Biomedical Image Segmentation](https://arxiv.org/abs/1505.04597) diff --git a/qai_hub_models/models/unet_segmentation/export.py b/qai_hub_models/models/unet_segmentation/export.py index 27cd31cb..6274534f 100644 --- a/qai_hub_models/models/unet_segmentation/export.py +++ b/qai_hub_models/models/unet_segmentation/export.py @@ -187,7 +187,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/unet_segmentation/perf.yaml b/qai_hub_models/models/unet_segmentation/perf.yaml index 51abe949..b4a6ea65 100644 --- a/qai_hub_models/models/unet_segmentation/perf.yaml +++ b/qai_hub_models/models/unet_segmentation/perf.yaml @@ -36,11 +36,11 @@ models: - name: Unet-Segmentation performance_metrics: - torchscript_onnx_tflite: - inference_time: 155816.0 - throughput: 6.417826153925143 + inference_time: 159228.0 + throughput: 6.280302459366443 estimated_peak_memory_range: - min: 6438912 - max: 229049848 + min: 6418432 + max: 111435960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jqp4w2l2g + job_id: jo5mvz7q5 job_status: Passed torchscript_onnx_qnn: - inference_time: 150601.0 - throughput: 6.640062150981733 + inference_time: 156519.0 + throughput: 6.389000696401076 estimated_peak_memory_range: - min: 10506240 - max: 32628664 + min: 9871360 + max: 31082800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 51 - job_id: jegnew6jg + job_id: jep23m1mg job_status: Passed torchscript_onnx_ort: - inference_time: 160595.0 - throughput: 6.226843924157041 + inference_time: 165647.0 + throughput: 6.03693396197939 estimated_peak_memory_range: - min: 7450624 - max: 152630040 + min: 13611008 + max: 154509064 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: j2p0rx40p + job_id: jn5q92nmp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.309399Z' + timestamp: '2024-06-08T23:21:28Z' - torchscript_onnx_tflite: - inference_time: 120918.0 - throughput: 8.270067318347971 + inference_time: 121153.0 + throughput: 8.254025901133279 estimated_peak_memory_range: - min: 5656576 - max: 336826176 + min: 6619136 + max: 339596672 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: j0px1zk8g + job_id: jegnre4m5 job_status: Passed torchscript_onnx_qnn: - inference_time: 110216.0 - throughput: 9.073092835885896 + inference_time: 110026.0 + throughput: 9.0887608383473 estimated_peak_memory_range: - min: 9908224 - max: 92426640 + min: 9850880 + max: 91369248 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 51 - job_id: jopry7vkg + job_id: j2p0erwe5 job_status: Passed torchscript_onnx_ort: - inference_time: 120104.0 - throughput: 8.326117364950376 + inference_time: 119057.0 + throughput: 8.399338132155187 estimated_peak_memory_range: - min: 22466560 - max: 102883024 + min: 22478848 + max: 104785056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: j1p87k2q5 + job_id: j1glekdlp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.309447Z' + timestamp: '2024-06-08T23:21:30Z' - torchscript_onnx_tflite: - inference_time: 169891.0 - throughput: 5.886126987303624 + inference_time: 157133.0 + throughput: 6.364035562230722 estimated_peak_memory_range: - min: 4673536 - max: 9033328 + min: 6680576 + max: 111633312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 31 - job_id: jo5mzln7p + job_id: jopr1yreg job_status: Passed torchscript_onnx_qnn: - inference_time: 154323.0 - throughput: 6.479915501901855 + inference_time: 148329.0 + throughput: 6.741769984291676 estimated_peak_memory_range: - min: 10018816 - max: 32111504 + min: 9969664 + max: 32982776 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 51 - job_id: jqpydy10p + job_id: jogkry1o5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.309477Z' + timestamp: '2024-06-08T23:21:27Z' - torchscript_onnx_qnn: - inference_time: 190382.0 - throughput: 5.252597409418958 + inference_time: 190476.0 + throughput: 5.25000525000525 estimated_peak_memory_range: - min: 9850880 - max: 9850880 + min: 9854976 + max: 9854976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 51 - job_id: jep2mzk65 + job_id: j1p8w7n8p job_status: Passed torchscript_onnx_ort: - inference_time: 146588.0 - throughput: 6.821840805522962 + inference_time: 146401.0 + throughput: 6.830554436103578 estimated_peak_memory_range: - min: 11423744 - max: 11423744 + min: 17457152 + max: 17457152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 53 - job_id: jogkykvvp + job_id: jw56q1x7g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.309509Z' + timestamp: '2024-06-08T23:21:31Z' diff --git a/qai_hub_models/models/vit/README.md b/qai_hub_models/models/vit/README.md index 314d20ef..924b05f0 100644 --- a/qai_hub_models/models/vit/README.md +++ b/qai_hub_models/models/vit/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of VIT can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929) diff --git a/qai_hub_models/models/vit/evaluate.py b/qai_hub_models/models/vit/evaluate.py new file mode 100644 index 00000000..91e37600 --- /dev/null +++ b/qai_hub_models/models/vit/evaluate.py @@ -0,0 +1,56 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.vit import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + supports_qnn=False, + supports_precompiled_ort=False, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/vit/export.py b/qai_hub_models/models/vit/export.py index de6eba4d..a4f94916 100644 --- a/qai_hub_models/models/vit/export.py +++ b/qai_hub_models/models/vit/export.py @@ -190,7 +190,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -219,7 +219,9 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser( + model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/vit/info.yaml b/qai_hub_models/models/vit/info.yaml index 6667f41f..ac7afa54 100644 --- a/qai_hub_models/models/vit/info.yaml +++ b/qai_hub_models/models/vit/info.yaml @@ -42,3 +42,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/vit/perf.yaml b/qai_hub_models/models/vit/perf.yaml index 2de2633f..459d6f2d 100644 --- a/qai_hub_models/models/vit/perf.yaml +++ b/qai_hub_models/models/vit/perf.yaml @@ -36,11 +36,11 @@ models: - name: VIT performance_metrics: - torchscript_onnx_tflite: - inference_time: 79254.0 - throughput: 12.617659676483205 + inference_time: 78496.0 + throughput: 12.73950264981655 estimated_peak_memory_range: - min: 139264 - max: 3176768 + min: 102400 + max: 3437176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 535 - job_id: j1glkq42p + job_id: jwgoevxdp job_status: Passed torchscript_onnx_ort: - inference_time: 104122.0 - throughput: 9.604118245903843 + inference_time: 103100.0 + throughput: 9.699321047526674 estimated_peak_memory_range: - min: 32768 - max: 419050688 + min: 110592 + max: 441770400 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +63,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 376 - job_id: jygz7024p + job_id: jnp1q89lg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,13 +72,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.343795Z' + timestamp: '2024-06-08T23:22:03Z' - torchscript_onnx_tflite: - inference_time: 56896.0 - throughput: 17.575928008998876 + inference_time: 56654.0 + throughput: 17.65100434214707 estimated_peak_memory_range: - min: 114688 - max: 373059376 + min: 77824 + max: 375276272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -86,14 +86,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 535 - job_id: jw56102np + job_id: j1pvzw8mg job_status: Passed torchscript_onnx_ort: - inference_time: 75468.0 - throughput: 13.250649281814809 + inference_time: 76545.0 + throughput: 13.064210595074792 estimated_peak_memory_range: - min: 622592 - max: 510131728 + min: 684032 + max: 513094432 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +101,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 376 - job_id: jz5w9rw4p + job_id: jvgd7vklg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,13 +110,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.343903Z' + timestamp: '2024-06-08T23:22:03Z' - torchscript_onnx_tflite: - inference_time: 79120.0 - throughput: 12.639029322548028 + inference_time: 78627.0 + throughput: 12.718277436504 estimated_peak_memory_range: - min: 131072 - max: 3222496 + min: 110592 + max: 6215968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -124,7 +124,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 535 - job_id: j1p3mrnmg + job_id: j7gjkl985 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -133,13 +133,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.343996Z' + timestamp: '2024-06-08T23:21:57Z' - torchscript_onnx_ort: - inference_time: 103336.0 - throughput: 9.677169621429124 + inference_time: 102862.0 + throughput: 9.721763138962883 estimated_peak_memory_range: - min: 186150912 - max: 186150912 + min: 158560256 + max: 158560256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -147,7 +147,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 376 - job_id: jmg94q0m5 + job_id: jz57vdqr5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -156,4 +156,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.344048Z' + timestamp: '2024-06-08T23:22:04Z' diff --git a/qai_hub_models/models/whisper_base_en/README.md b/qai_hub_models/models/whisper_base_en/README.md index d751e49d..441351db 100644 --- a/qai_hub_models/models/whisper_base_en/README.md +++ b/qai_hub_models/models/whisper_base_en/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Whisper-Base-En can be found [here](https://github.com/openai/whisper/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Robust Speech Recognition via Large-Scale Weak Supervision](https://cdn.openai.com/papers/whisper.pdf) diff --git a/qai_hub_models/models/whisper_base_en/export.py b/qai_hub_models/models/whisper_base_en/export.py index 095bc0e3..4bb6b358 100644 --- a/qai_hub_models/models/whisper_base_en/export.py +++ b/qai_hub_models/models/whisper_base_en/export.py @@ -194,7 +194,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/whisper_base_en/perf.yaml b/qai_hub_models/models/whisper_base_en/perf.yaml index ecd7e150..881707cf 100644 --- a/qai_hub_models/models/whisper_base_en/perf.yaml +++ b/qai_hub_models/models/whisper_base_en/perf.yaml @@ -36,11 +36,11 @@ models: - name: WhisperEncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 159634.0 - throughput: 6.264329654083716 + inference_time: 158811.0 + throughput: 6.296793043303046 estimated_peak_memory_range: - min: 30449664 - max: 132588528 + min: 31092736 + max: 131633968 primary_compute_unit: GPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 419 layers_on_cpu: 0 total_layers: 419 - job_id: jvgdvmn6g + job_id: j0pxe1w95 job_status: Passed torchscript_onnx_qnn: - inference_time: 605553.0 - throughput: 1.6513831159287462 + inference_time: 624615.0 + throughput: 1.6009862075038224 estimated_peak_memory_range: - min: 1105920 - max: 76836912 + min: 131072 + max: 82142360 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 580 - job_id: jqp4w2nqg + job_id: j2p0erne5 job_status: Passed torchscript_onnx_ort: - inference_time: 429986.0 - throughput: 2.3256571144176785 + inference_time: 394348.0 + throughput: 2.5358312962155254 estimated_peak_memory_range: - min: 73736192 - max: 249023480 + min: 4792320 + max: 165488160 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 380 - job_id: j1p87k0z5 + job_id: j1pvzwjmg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.369854Z' + timestamp: '2024-06-08T23:22:51Z' - torchscript_onnx_tflite: - inference_time: 123349.0 - throughput: 8.107078290055046 + inference_time: 122023.0 + throughput: 8.195176319218508 estimated_peak_memory_range: - min: 17702912 - max: 63075712 + min: 37249024 + max: 82154976 primary_compute_unit: GPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 419 layers_on_cpu: 0 total_layers: 419 - job_id: jmg94q0q5 + job_id: jegnrejm5 job_status: Passed torchscript_onnx_qnn: - inference_time: 457237.0 - throughput: 2.187049604472079 + inference_time: 452457.0 + throughput: 2.210154777139043 estimated_peak_memory_range: min: 0 - max: 198167856 + max: 198495008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 580 - job_id: jo5mzleyp + job_id: jogkryjo5 job_status: Passed torchscript_onnx_ort: - inference_time: 302992.0 - throughput: 3.3004171727306333 + inference_time: 300384.0 + throughput: 3.3290721210184295 estimated_peak_memory_range: - min: 74620928 - max: 277528096 + min: 62181376 + max: 262749552 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 380 - job_id: jn5q2de75 + job_id: jlpe4vj05 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.370015Z' + timestamp: '2024-06-08T23:22:52Z' - torchscript_onnx_tflite: - inference_time: 158470.0 - throughput: 6.310342651605982 + inference_time: 158001.0 + throughput: 6.329073866621098 estimated_peak_memory_range: min: 12288 - max: 93808464 + max: 104601560 primary_compute_unit: GPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 419 layers_on_cpu: 0 total_layers: 419 - job_id: jvgdvmnkg + job_id: jep23m2mg job_status: Passed torchscript_onnx_qnn: - inference_time: 632689.0 - throughput: 1.58055537554786 + inference_time: 623834.0 + throughput: 1.602990539149838 estimated_peak_memory_range: - min: 77824 - max: 80008712 + min: 139264 + max: 76510216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 580 - job_id: jqpydyrrp + job_id: j1p3qmyz5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.370125Z' + timestamp: '2024-06-08T23:22:49Z' - torchscript_onnx_qnn: - inference_time: 463047.0 - throughput: 2.159607987958026 + inference_time: 454926.0 + throughput: 2.198159700698575 estimated_peak_memory_range: min: 962560 max: 962560 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 579 - job_id: jopry78vg + job_id: j1glekjlp job_status: Passed torchscript_onnx_ort: - inference_time: 389738.0 - throughput: 2.5658262730347055 + inference_time: 383597.0 + throughput: 2.606902556589337 estimated_peak_memory_range: - min: 138715136 - max: 138715136 + min: 139669504 + max: 139669504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 380 - job_id: jw5610evp + job_id: jz5wm9jjg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,15 +216,15 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.370231Z' + timestamp: '2024-06-08T23:22:54Z' - name: WhisperDecoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 24968.0 - throughput: 40.05126561999359 + inference_time: 24389.0 + throughput: 41.00209110664644 estimated_peak_memory_range: - min: 5763072 - max: 8595680 + min: 5771264 + max: 8649416 primary_compute_unit: NPU precision: fp16 layer_info: @@ -232,14 +232,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 983 - job_id: jz5w9rwzp + job_id: jo5mvzjq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 23886.0 - throughput: 41.86552792430712 + inference_time: 22769.0 + throughput: 43.91936404760859 estimated_peak_memory_range: - min: 42430464 - max: 59718408 + min: 42414080 + max: 60923784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -247,14 +247,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 821 - job_id: j0px1z9jg + job_id: j1p8w7l8p job_status: Passed torchscript_onnx_ort: - inference_time: 24467.0 - throughput: 40.87137777414477 + inference_time: 24751.0 + throughput: 40.402407983515815 estimated_peak_memory_range: - min: 20480 - max: 320053784 + min: 12656640 + max: 328987984 primary_compute_unit: NPU precision: fp16 layer_info: @@ -262,7 +262,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 844 - job_id: jogkyk7yp + job_id: j7gjklj85 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -271,13 +271,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.370494Z' + timestamp: '2024-06-08T23:22:51Z' - torchscript_onnx_tflite: - inference_time: 19456.0 - throughput: 51.39802631578947 + inference_time: 18854.0 + throughput: 53.039142887450936 estimated_peak_memory_range: - min: 4390912 - max: 91122240 + min: 4575232 + max: 93812240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -285,14 +285,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 983 - job_id: jnp18m2kg + job_id: jopr1yzeg job_status: Passed torchscript_onnx_qnn: - inference_time: 18796.0 - throughput: 53.20280910832092 + inference_time: 18709.0 + throughput: 53.450211128333954 estimated_peak_memory_range: - min: 42418176 - max: 323686720 + min: 42438656 + max: 323848592 primary_compute_unit: NPU precision: fp16 layer_info: @@ -300,14 +300,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 821 - job_id: jegnewlvg + job_id: jn5q92jmp job_status: Passed torchscript_onnx_ort: - inference_time: 20598.0 - throughput: 48.548402757549276 + inference_time: 20257.0 + throughput: 49.36565137976996 estimated_peak_memory_range: - min: 52908032 - max: 139842400 + min: 52916224 + max: 140494080 primary_compute_unit: NPU precision: fp16 layer_info: @@ -315,7 +315,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 844 - job_id: j1glkq6ep + job_id: jygzv716p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -324,13 +324,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.370757Z' + timestamp: '2024-06-08T23:22:53Z' - torchscript_onnx_tflite: - inference_time: 23198.0 - throughput: 43.10716441072506 + inference_time: 23324.0 + throughput: 42.87429257417253 estimated_peak_memory_range: - min: 5951488 - max: 11437768 + min: 5750784 + max: 9075392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -338,14 +338,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 983 - job_id: jz57d82q5 + job_id: jqpyvd94p job_status: Passed torchscript_onnx_qnn: - inference_time: 23510.0 - throughput: 42.53509145044662 + inference_time: 24053.0 + throughput: 41.57485552737704 estimated_peak_memory_range: - min: 42409984 - max: 58496416 + min: 42450944 + max: 59016968 primary_compute_unit: NPU precision: fp16 layer_info: @@ -353,7 +353,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 821 - job_id: j2p0rx32p + job_id: jwgoevjdp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -362,13 +362,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.370934Z' + timestamp: '2024-06-08T23:22:49Z' - torchscript_onnx_qnn: - inference_time: 13714.0 - throughput: 72.91818579553741 + inference_time: 13816.0 + throughput: 72.37984944991314 estimated_peak_memory_range: - min: 42463232 - max: 42463232 + min: 42455040 + max: 42455040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -376,14 +376,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 821 - job_id: jep2mz0x5 + job_id: jw56q1k7g job_status: Passed torchscript_onnx_ort: - inference_time: 19806.0 - throughput: 50.48975058063213 + inference_time: 20016.0 + throughput: 49.96003197442047 estimated_peak_memory_range: - min: 112349184 - max: 112349184 + min: 45969408 + max: 45969408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -391,7 +391,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 844 - job_id: j1p3mrvxg + job_id: jmg9946vg job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -400,4 +400,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.371105Z' + timestamp: '2024-06-08T23:22:55Z' diff --git a/qai_hub_models/models/whisper_base_en/requirements.txt b/qai_hub_models/models/whisper_base_en/requirements.txt index 75b1cf12..fa34d4f8 100644 --- a/qai_hub_models/models/whisper_base_en/requirements.txt +++ b/qai_hub_models/models/whisper_base_en/requirements.txt @@ -1,2 +1,2 @@ openai-whisper==20230314 -scipy +scipy==1.8.1 diff --git a/qai_hub_models/models/whisper_small_en/README.md b/qai_hub_models/models/whisper_small_en/README.md index f0f96498..5a1422a0 100644 --- a/qai_hub_models/models/whisper_small_en/README.md +++ b/qai_hub_models/models/whisper_small_en/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Whisper-Small-En can be found [here](https://github.com/openai/whisper/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Robust Speech Recognition via Large-Scale Weak Supervision](https://cdn.openai.com/papers/whisper.pdf) diff --git a/qai_hub_models/models/whisper_small_en/export.py b/qai_hub_models/models/whisper_small_en/export.py index dc34702d..e6937074 100644 --- a/qai_hub_models/models/whisper_small_en/export.py +++ b/qai_hub_models/models/whisper_small_en/export.py @@ -194,7 +194,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/whisper_small_en/perf.yaml b/qai_hub_models/models/whisper_small_en/perf.yaml index d14f6b03..aae6bde6 100644 --- a/qai_hub_models/models/whisper_small_en/perf.yaml +++ b/qai_hub_models/models/whisper_small_en/perf.yaml @@ -36,11 +36,11 @@ models: - name: WhisperEncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 617428.0 - throughput: 1.6196220449995788 + inference_time: 610635.0 + throughput: 1.6376395064154528 estimated_peak_memory_range: - min: 48623616 - max: 497398832 + min: 8286208 + max: 437557824 primary_compute_unit: GPU precision: fp16 layer_info: @@ -48,22 +48,22 @@ models: layers_on_gpu: 911 layers_on_cpu: 0 total_layers: 911 - job_id: j7gjl8z7p + job_id: jz57vdzr5 job_status: Passed - torchscript_onnx_ort: - inference_time: 1710031.0 - throughput: 0.5847847202770008 + torchscript_onnx_qnn: + inference_time: 1969063.0 + throughput: 0.5078557669307686 estimated_peak_memory_range: - min: 110481408 - max: 507852736 + min: 1097728 + max: 226008440 primary_compute_unit: NPU precision: fp16 layer_info: - layers_on_npu: 884 + layers_on_npu: 1474 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 884 - job_id: jqpydyjrp + total_layers: 1474 + job_id: jep23m8mg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,13 +72,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.470469Z' + timestamp: '2024-06-08T23:24:00Z' - torchscript_onnx_tflite: - inference_time: 471828.0 - throughput: 2.119416397500784 + inference_time: 467725.0 + throughput: 2.1380084451333583 estimated_peak_memory_range: - min: 110387200 - max: 210216224 + min: 111644672 + max: 209573760 primary_compute_unit: GPU precision: fp16 layer_info: @@ -86,14 +86,14 @@ models: layers_on_gpu: 911 layers_on_cpu: 0 total_layers: 911 - job_id: jygz70ozp + job_id: j0pxe1v95 job_status: Passed torchscript_onnx_qnn: - inference_time: 1470022.0 - throughput: 0.6802619280527774 + inference_time: 1435234.0 + throughput: 0.6967504950412268 estimated_peak_memory_range: min: 0 - max: 568505552 + max: 570396624 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1474 - job_id: jqp4w2kqg + job_id: j2p0erye5 job_status: Passed torchscript_onnx_ort: - inference_time: 1253665.0 - throughput: 0.7976612571939075 + inference_time: 1240429.0 + throughput: 0.8061727031535058 estimated_peak_memory_range: - min: 111489024 - max: 678294336 + min: 350531584 + max: 914876112 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,7 +116,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 884 - job_id: j1p87kmz5 + job_id: j1pvzw3mg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -125,13 +125,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.470810Z' + timestamp: '2024-06-08T23:24:10Z' - torchscript_onnx_tflite: - inference_time: 610437.0 - throughput: 1.6381706875566193 + inference_time: 611130.0 + throughput: 1.6363130594145272 estimated_peak_memory_range: - min: 0 - max: 449077216 + min: 68825088 + max: 504071032 primary_compute_unit: GPU precision: fp16 layer_info: @@ -139,7 +139,7 @@ models: layers_on_gpu: 911 layers_on_cpu: 0 total_layers: 911 - job_id: jmg94qjq5 + job_id: jegnre2m5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -148,10 +148,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.470911Z' + timestamp: '2024-06-08T23:23:58Z' - torchscript_onnx_qnn: - inference_time: 1702121.0 - throughput: 0.5875022986027433 + inference_time: 1682160.0 + throughput: 0.5944737718171874 estimated_peak_memory_range: min: 962560 max: 962560 @@ -162,14 +162,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 1473 - job_id: jo5mzlqyp + job_id: jogkryzo5 job_status: Passed torchscript_onnx_ort: - inference_time: 1516833.0 - throughput: 0.6592683571625881 + inference_time: 1497981.0 + throughput: 0.667565209438571 estimated_peak_memory_range: - min: 74731520 - max: 74731520 + min: 555839488 + max: 555839488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -177,7 +177,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 884 - job_id: jn5q2dr75 + job_id: jlpe4v905 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -186,15 +186,15 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.471142Z' + timestamp: '2024-06-08T23:24:12Z' - name: WhisperDecoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 26398.0 - throughput: 37.88165770134101 + inference_time: 26644.0 + throughput: 37.53190211679928 estimated_peak_memory_range: - min: 16756736 - max: 20661456 + min: 16855040 + max: 20865456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -202,14 +202,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2573 - job_id: jlpevne75 + job_id: jqp4jwqlp job_status: Passed torchscript_onnx_qnn: - inference_time: 25326.0 - throughput: 39.48511411197978 + inference_time: 24731.0 + throughput: 40.43508147668918 estimated_peak_memory_range: - min: 132386816 - max: 205732832 + min: 124076032 + max: 200059296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -217,22 +217,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2255 - job_id: jz57d80q5 - job_status: Passed - torchscript_onnx_ort: - inference_time: 62664.0 - throughput: 15.958125877696924 - estimated_peak_memory_range: - min: 53420032 - max: 713755144 - primary_compute_unit: NPU - precision: fp16 - layer_info: - layers_on_npu: 2302 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 2302 - job_id: j2p0rx22p + job_id: jqpyvde4p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -241,13 +226,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.471830Z' + timestamp: '2024-06-08T23:24:00Z' - torchscript_onnx_tflite: - inference_time: 20598.0 - throughput: 48.548402757549276 + inference_time: 19793.0 + throughput: 50.52291214065579 estimated_peak_memory_range: - min: 16793600 - max: 1152170896 + min: 16777216 + max: 1154461280 primary_compute_unit: NPU precision: fp16 layer_info: @@ -255,14 +240,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2573 - job_id: jz5w9r2zp + job_id: jo5mvzrq5 job_status: Passed torchscript_onnx_qnn: - inference_time: 19885.0 - throughput: 50.28916268544129 + inference_time: 19453.0 + throughput: 51.40595280933532 estimated_peak_memory_range: - min: 12111872 - max: 804591888 + min: 72151040 + max: 864487680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -270,14 +255,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2255 - job_id: j0px1znjg + job_id: j1p8w7o8p job_status: Passed torchscript_onnx_ort: - inference_time: 53383.0 - throughput: 18.732555307869546 + inference_time: 53273.0 + throughput: 18.77123495954799 estimated_peak_memory_range: - min: 90411008 - max: 358314384 + min: 50139136 + max: 319234896 primary_compute_unit: NPU precision: fp16 layer_info: @@ -285,7 +270,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2302 - job_id: jogkykqyp + job_id: j7gjklx85 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -294,13 +279,28 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.472545Z' - - torchscript_onnx_qnn: - inference_time: 25743.0 - throughput: 38.845511401157594 + timestamp: '2024-06-08T23:24:11Z' + - torchscript_onnx_tflite: + inference_time: 27029.0 + throughput: 36.997299197158604 + estimated_peak_memory_range: + min: 16769024 + max: 20284792 + primary_compute_unit: NPU + precision: fp16 + layer_info: + layers_on_npu: 2573 + layers_on_gpu: 0 + layers_on_cpu: 0 + total_layers: 2573 + job_id: jopr1ykeg + job_status: Passed + torchscript_onnx_qnn: + inference_time: 25818.0 + throughput: 38.73266713145867 estimated_peak_memory_range: - min: 127111168 - max: 202162640 + min: 127201280 + max: 197556544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -308,7 +308,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2255 - job_id: jep2mz9x5 + job_id: jw56q167g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -317,13 +317,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.472758Z' + timestamp: '2024-06-08T23:24:07Z' - torchscript_onnx_qnn: - inference_time: 20785.0 - throughput: 48.11161895597787 + inference_time: 20402.0 + throughput: 49.01480247034605 estimated_peak_memory_range: - min: 127361024 - max: 127361024 + min: 127381504 + max: 127381504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -331,14 +331,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2255 - job_id: jegnewmvg + job_id: jn5q928mp job_status: Passed torchscript_onnx_ort: - inference_time: 53319.0 - throughput: 18.7550404171121 + inference_time: 53485.0 + throughput: 18.696830887164626 estimated_peak_memory_range: - min: 309313536 - max: 309313536 + min: 342065152 + max: 342065152 primary_compute_unit: NPU precision: fp16 layer_info: @@ -346,7 +346,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 2302 - job_id: j1glkq2ep + job_id: jygzv7e6p job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -355,4 +355,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.473190Z' + timestamp: '2024-06-08T23:24:13Z' diff --git a/qai_hub_models/models/whisper_tiny_en/README.md b/qai_hub_models/models/whisper_tiny_en/README.md index e541696e..2ce1b0c2 100644 --- a/qai_hub_models/models/whisper_tiny_en/README.md +++ b/qai_hub_models/models/whisper_tiny_en/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Whisper-Tiny-En can be found [here](https://github.com/openai/whisper/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Robust Speech Recognition via Large-Scale Weak Supervision](https://cdn.openai.com/papers/whisper.pdf) diff --git a/qai_hub_models/models/whisper_tiny_en/export.py b/qai_hub_models/models/whisper_tiny_en/export.py index 4c2d1226..050e09fa 100644 --- a/qai_hub_models/models/whisper_tiny_en/export.py +++ b/qai_hub_models/models/whisper_tiny_en/export.py @@ -194,7 +194,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/whisper_tiny_en/perf.yaml b/qai_hub_models/models/whisper_tiny_en/perf.yaml index 08635966..cf5d7cdb 100644 --- a/qai_hub_models/models/whisper_tiny_en/perf.yaml +++ b/qai_hub_models/models/whisper_tiny_en/perf.yaml @@ -36,11 +36,11 @@ models: - name: WhisperEncoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 68848.0 - throughput: 14.524750174297003 + inference_time: 68470.0 + throughput: 14.604936468526361 estimated_peak_memory_range: - min: 12288 - max: 48238640 + min: 16613376 + max: 64496288 primary_compute_unit: GPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 271 layers_on_cpu: 0 total_layers: 271 - job_id: jwgov9n45 + job_id: jnp1q80lg job_status: Passed torchscript_onnx_qnn: - inference_time: 287627.0 - throughput: 3.4767250640586593 + inference_time: 286944.0 + throughput: 3.485000557600089 estimated_peak_memory_range: - min: 1114112 - max: 47243920 + min: 1019904 + max: 52873616 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +63,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 338 - job_id: jmg94qyq5 + job_id: jegnreym5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,13 +72,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.534187Z' + timestamp: '2024-06-08T23:24:44Z' - torchscript_onnx_tflite: - inference_time: 53307.0 - throughput: 18.75926238580299 + inference_time: 54112.0 + throughput: 18.48018923713779 estimated_peak_memory_range: min: 0 - max: 32147600 + max: 36724816 primary_compute_unit: GPU precision: fp16 layer_info: @@ -86,14 +86,14 @@ models: layers_on_gpu: 271 layers_on_cpu: 0 total_layers: 271 - job_id: j7gjl827p + job_id: jz57vdwr5 job_status: Passed torchscript_onnx_qnn: - inference_time: 217027.0 - throughput: 4.607721619890612 + inference_time: 218003.0 + throughput: 4.587092838171952 estimated_peak_memory_range: - min: 974848 - max: 136277600 + min: 406650880 + max: 543573456 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +101,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 338 - job_id: jvgdvmqkg + job_id: jep23m6mg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,13 +110,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.534272Z' + timestamp: '2024-06-08T23:24:46Z' - torchscript_onnx_tflite: - inference_time: 68587.0 - throughput: 14.580022453234578 + inference_time: 68514.0 + throughput: 14.595557112414982 estimated_peak_memory_range: - min: 12288 - max: 47889640 + min: 18030592 + max: 66868584 primary_compute_unit: GPU precision: fp16 layer_info: @@ -124,14 +124,14 @@ models: layers_on_gpu: 271 layers_on_cpu: 0 total_layers: 271 - job_id: jygz70jzp + job_id: j0pxe1j95 job_status: Passed torchscript_onnx_qnn: - inference_time: 293684.0 - throughput: 3.405020362021765 + inference_time: 288936.0 + throughput: 3.4609740565384723 estimated_peak_memory_range: - min: 57344 - max: 51548904 + min: 159744 + max: 53294424 primary_compute_unit: NPU precision: fp16 layer_info: @@ -139,7 +139,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 338 - job_id: jo5mzl6yp + job_id: jogkryno5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -148,10 +148,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.534349Z' + timestamp: '2024-06-08T23:24:50Z' - torchscript_onnx_qnn: - inference_time: 239161.0 - throughput: 4.181283737733159 + inference_time: 237871.0 + throughput: 4.203959288858247 estimated_peak_memory_range: min: 962560 max: 962560 @@ -162,7 +162,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 337 - job_id: jqp4w2dqg + job_id: j2p0erqe5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -171,15 +171,15 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.534395Z' + timestamp: '2024-06-08T23:24:48Z' - name: WhisperDecoder performance_metrics: - torchscript_onnx_tflite: - inference_time: 3779.0 - throughput: 264.6202699126753 + inference_time: 3853.0 + throughput: 259.53802232026993 estimated_peak_memory_range: - min: 2977792 - max: 7682616 + min: 2973696 + max: 6011536 primary_compute_unit: NPU precision: fp16 layer_info: @@ -187,14 +187,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: j1pvwnr7g + job_id: jvgd7vwlg job_status: Passed torchscript_onnx_qnn: - inference_time: 3638.0 - throughput: 274.8763056624519 + inference_time: 3672.0 + throughput: 272.33115468409585 estimated_peak_memory_range: - min: 9932800 - max: 46815096 + min: 21250048 + max: 48536944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -202,14 +202,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 447 - job_id: jnp18mwkg + job_id: jopr1yqeg job_status: Passed torchscript_onnx_ort: - inference_time: 5435.0 - throughput: 183.99264029438822 + inference_time: 5299.0 + throughput: 188.71485185884129 estimated_peak_memory_range: - min: 6512640 - max: 215990552 + min: 6336512 + max: 214237680 primary_compute_unit: NPU precision: fp16 layer_info: @@ -217,7 +217,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 462 - job_id: jep2mzlx5 + job_id: jw56q1j7g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -226,13 +226,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.534555Z' + timestamp: '2024-06-08T23:24:52Z' - torchscript_onnx_tflite: - inference_time: 3165.0 - throughput: 315.955766192733 + inference_time: 2973.0 + throughput: 336.3605785401951 estimated_peak_memory_range: - min: 2949120 - max: 226145904 + min: 942080 + max: 226696352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -240,14 +240,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: jlpevnw75 + job_id: jqp4jwolp job_status: Passed torchscript_onnx_qnn: - inference_time: 2774.0 - throughput: 360.49026676279743 + inference_time: 2764.0 + throughput: 361.794500723589 estimated_peak_memory_range: min: 0 - max: 135656672 + max: 138707216 primary_compute_unit: NPU precision: fp16 layer_info: @@ -255,14 +255,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 447 - job_id: jz57d8lq5 + job_id: jqpyvdw4p job_status: Passed torchscript_onnx_ort: - inference_time: 4316.0 - throughput: 231.69601482854495 + inference_time: 4502.0 + throughput: 222.1235006663705 estimated_peak_memory_range: - min: 27541504 - max: 88893920 + min: 27127808 + max: 85392304 primary_compute_unit: NPU precision: fp16 layer_info: @@ -270,7 +270,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 462 - job_id: j2p0rxl2p + job_id: jwgoev0dp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -279,13 +279,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.534716Z' + timestamp: '2024-06-08T23:24:54Z' - torchscript_onnx_tflite: - inference_time: 3880.0 - throughput: 257.7319587628866 + inference_time: 3909.0 + throughput: 255.81990278843693 estimated_peak_memory_range: - min: 159744 - max: 2928728 + min: 2981888 + max: 5533208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -293,14 +293,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 557 - job_id: jz5w9r3zp + job_id: jo5mvz2q5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3676.0 - throughput: 272.0348204570185 + inference_time: 3717.0 + throughput: 269.03416733925206 estimated_peak_memory_range: - min: 21233664 - max: 48029776 + min: 21213184 + max: 37347800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -308,7 +308,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 447 - job_id: jegnew3vg + job_id: jn5q92kmp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -317,13 +317,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.534826Z' + timestamp: '2024-06-08T23:24:50Z' - torchscript_onnx_qnn: - inference_time: 3678.0 - throughput: 271.8868950516585 + inference_time: 3772.0 + throughput: 265.11134676564154 estimated_peak_memory_range: - min: 21233664 - max: 21233664 + min: 21229568 + max: 21229568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -331,14 +331,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 447 - job_id: j0px1z6jg + job_id: j1p8w798p job_status: Passed torchscript_onnx_ort: - inference_time: 4599.0 - throughput: 217.43857360295718 + inference_time: 4450.0 + throughput: 224.7191011235955 estimated_peak_memory_range: - min: 18477056 - max: 18477056 + min: 19857408 + max: 19857408 primary_compute_unit: NPU precision: fp16 layer_info: @@ -346,7 +346,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 462 - job_id: jogkyk3yp + job_id: j7gjklm85 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -355,4 +355,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.534928Z' + timestamp: '2024-06-08T23:24:56Z' diff --git a/qai_hub_models/models/whisper_tiny_en/requirements.txt b/qai_hub_models/models/whisper_tiny_en/requirements.txt index 75b1cf12..fa34d4f8 100644 --- a/qai_hub_models/models/whisper_tiny_en/requirements.txt +++ b/qai_hub_models/models/whisper_tiny_en/requirements.txt @@ -1,2 +1,2 @@ openai-whisper==20230314 -scipy +scipy==1.8.1 diff --git a/qai_hub_models/models/wideresnet50/README.md b/qai_hub_models/models/wideresnet50/README.md index 1fd5bb18..d212e4b8 100644 --- a/qai_hub_models/models/wideresnet50/README.md +++ b/qai_hub_models/models/wideresnet50/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of WideResNet50 can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Wide Residual Networks](https://arxiv.org/abs/1605.07146) diff --git a/qai_hub_models/models/wideresnet50/evaluate.py b/qai_hub_models/models/wideresnet50/evaluate.py new file mode 100644 index 00000000..8a6a9482 --- /dev/null +++ b/qai_hub_models/models/wideresnet50/evaluate.py @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.wideresnet50 import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + torch_model = Model.from_pretrained(**get_model_kwargs(Model, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/wideresnet50/export.py b/qai_hub_models/models/wideresnet50/export.py index 5495b5f9..a5bd28dc 100644 --- a/qai_hub_models/models/wideresnet50/export.py +++ b/qai_hub_models/models/wideresnet50/export.py @@ -188,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/wideresnet50/info.yaml b/qai_hub_models/models/wideresnet50/info.yaml index abeab0e0..59d4817f 100644 --- a/qai_hub_models/models/wideresnet50/info.yaml +++ b/qai_hub_models/models/wideresnet50/info.yaml @@ -40,3 +40,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/wideresnet50/perf.yaml b/qai_hub_models/models/wideresnet50/perf.yaml index 75b89214..8a782d43 100644 --- a/qai_hub_models/models/wideresnet50/perf.yaml +++ b/qai_hub_models/models/wideresnet50/perf.yaml @@ -36,11 +36,11 @@ models: - name: WideResNet50 performance_metrics: - torchscript_onnx_tflite: - inference_time: 4883.0 - throughput: 204.7921359819783 + inference_time: 4868.0 + throughput: 205.42317173377157 estimated_peak_memory_range: - min: 20480 - max: 2355000 + min: 24576 + max: 2240024 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jw5610nvp + job_id: jz5wm9vjg job_status: Passed torchscript_onnx_qnn: - inference_time: 5682.0 - throughput: 175.99436818021823 + inference_time: 5652.0 + throughput: 176.92852087756546 estimated_peak_memory_range: min: 622592 - max: 354632496 + max: 250014320 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j1pvwnv7g + job_id: jvgd7v9lg job_status: Passed torchscript_onnx_ort: - inference_time: 5434.0 - throughput: 184.0264998159735 + inference_time: 5471.0 + throughput: 182.78194114421495 estimated_peak_memory_range: - min: 622592 - max: 484949896 + min: 20480 + max: 445804176 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jz5w9rqzp + job_id: jvgd7vleg job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.594875Z' + timestamp: '2024-06-08T23:25:28Z' - torchscript_onnx_tflite: - inference_time: 3633.0 - throughput: 275.2546105147261 + inference_time: 3644.0 + throughput: 274.423710208562 estimated_peak_memory_range: min: 16384 - max: 96201536 + max: 100476704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: j1p3mrexg + job_id: jmg9941vg job_status: Passed torchscript_onnx_qnn: - inference_time: 4161.0 - throughput: 240.3268445085316 + inference_time: 4212.0 + throughput: 237.41690408357076 estimated_peak_memory_range: min: 618496 - max: 54082784 + max: 53808800 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: j7gjl8e7p + job_id: jz5wm9n6g job_status: Passed torchscript_onnx_ort: - inference_time: 4144.0 - throughput: 241.3127413127413 + inference_time: 4064.0 + throughput: 246.06299212598427 estimated_peak_memory_range: - min: 528384 - max: 35985184 + min: 618496 + max: 31598192 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jmg94qwq5 + job_id: jz57vd3l5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.594940Z' + timestamp: '2024-06-08T23:25:29Z' - torchscript_onnx_tflite: - inference_time: 4880.0 - throughput: 204.91803278688525 + inference_time: 4872.0 + throughput: 205.2545155993432 estimated_peak_memory_range: - min: 24576 - max: 2303568 + min: 20480 + max: 2441976 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 79 - job_id: jwgov9345 + job_id: jnp1q8llg job_status: Passed torchscript_onnx_qnn: - inference_time: 5681.0 - throughput: 176.0253476500616 + inference_time: 5687.0 + throughput: 175.83963425356075 estimated_peak_memory_range: - min: 16384 - max: 344577184 + min: 618496 + max: 354920904 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jygz70rzp + job_id: jnp1q8x2g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.594981Z' + timestamp: '2024-06-08T23:25:27Z' - torchscript_onnx_qnn: - inference_time: 5868.0 - throughput: 170.41581458759373 + inference_time: 5842.0 + throughput: 171.17425539198905 estimated_peak_memory_range: min: 602112 max: 602112 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 126 - job_id: jlpevnk75 + job_id: jmg994elg job_status: Passed torchscript_onnx_ort: - inference_time: 5093.0 - throughput: 196.34792852935402 + inference_time: 5121.0 + throughput: 195.27436047646944 estimated_peak_memory_range: - min: 49831936 - max: 49831936 + min: 71557120 + max: 71557120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 128 - job_id: jnp18mekg + job_id: jqp4jw0vp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.595027Z' + timestamp: '2024-06-08T23:25:30Z' diff --git a/qai_hub_models/models/wideresnet50_quantized/README.md b/qai_hub_models/models/wideresnet50_quantized/README.md index 5fd6c471..ee7cb919 100644 --- a/qai_hub_models/models/wideresnet50_quantized/README.md +++ b/qai_hub_models/models/wideresnet50_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of WideResNet50-Quantized can be found [here](https://github.com/pytorch/vision/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Wide Residual Networks](https://arxiv.org/abs/1605.07146) diff --git a/qai_hub_models/models/wideresnet50_quantized/evaluate.py b/qai_hub_models/models/wideresnet50_quantized/evaluate.py new file mode 100644 index 00000000..232037a3 --- /dev/null +++ b/qai_hub_models/models/wideresnet50_quantized/evaluate.py @@ -0,0 +1,62 @@ +# --------------------------------------------------------------------- +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +# THIS FILE WAS AUTO-GENERATED. DO NOT EDIT MANUALLY. + + +from __future__ import annotations + +import warnings + +import qai_hub as hub + +from qai_hub_models.models.wideresnet50_quantized import MODEL_ID, Model +from qai_hub_models.utils.args import evaluate_parser, get_hub_device, get_model_kwargs +from qai_hub_models.utils.base_model import BaseModel +from qai_hub_models.utils.evaluate import evaluate_on_dataset +from qai_hub_models.utils.inference import compile_model_from_args +from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin + +SUPPORTED_DATASETS = ["imagenette", "imagenet"] + + +def main(): + warnings.filterwarnings("ignore") + parser = evaluate_parser( + model_cls=Model, + default_split_size=2500, + supported_datasets=SUPPORTED_DATASETS, + ) + args = parser.parse_args() + args.device = None + + if args.hub_model_id is not None: + hub_model = hub.get_model(args.hub_model_id) + else: + hub_model = compile_model_from_args( + MODEL_ID, args, get_model_kwargs(Model, vars(args)) + ) + hub_device = get_hub_device(None, args.chipset) + + # Use Fp16 model for torch inference + for cls in Model.__mro__: + if issubclass(cls, BaseModel) and not issubclass(cls, AIMETQuantizableMixin): + torch_cls = cls + break + torch_model = torch_cls.from_pretrained(**get_model_kwargs(torch_cls, vars(args))) + evaluate_on_dataset( + hub_model, + torch_model, + hub_device, + args.dataset_name, + args.split_size, + args.num_samples, + args.seed, + args.profile_options, + args.use_cache, + ) + + +if __name__ == "__main__": + main() diff --git a/qai_hub_models/models/wideresnet50_quantized/export.py b/qai_hub_models/models/wideresnet50_quantized/export.py index 26cd34f6..a10d2988 100644 --- a/qai_hub_models/models/wideresnet50_quantized/export.py +++ b/qai_hub_models/models/wideresnet50_quantized/export.py @@ -196,7 +196,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/wideresnet50_quantized/info.yaml b/qai_hub_models/models/wideresnet50_quantized/info.yaml index ec14612f..d6bd4260 100644 --- a/qai_hub_models/models/wideresnet50_quantized/info.yaml +++ b/qai_hub_models/models/wideresnet50_quantized/info.yaml @@ -41,3 +41,4 @@ deploy_license_type: AI Model Hub License dataset: - imagenet-1k - imagenet-22k +labels_file: imagenet_labels.txt diff --git a/qai_hub_models/models/wideresnet50_quantized/perf.yaml b/qai_hub_models/models/wideresnet50_quantized/perf.yaml index 4884a99a..cd023541 100644 --- a/qai_hub_models/models/wideresnet50_quantized/perf.yaml +++ b/qai_hub_models/models/wideresnet50_quantized/perf.yaml @@ -42,11 +42,11 @@ models: - name: WideResNet50-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1818.0 - throughput: 550.05500550055 + inference_time: 1803.0 + throughput: 554.6311702717693 estimated_peak_memory_range: - min: 24576 - max: 2095776 + min: 12288 + max: 2605960 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +54,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 80 - job_id: jz57d8xq5 + job_id: jo5mvzyw5 job_status: Passed torchscript_onnx_qnn: - inference_time: 2048.0 - throughput: 488.28125 + inference_time: 2049.0 + throughput: 488.0429477794046 estimated_peak_memory_range: min: 16384 - max: 250400872 + max: 124262304 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,22 +69,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 78 - job_id: jopry7yvg + job_id: j2p0er765 job_status: Passed torchscript_onnx_ort: - inference_time: 2137.0 - throughput: 467.94571829667757 + inference_time: 2037.0 + throughput: 490.9180166912126 estimated_peak_memory_range: min: 12288 - max: 291422160 + max: 210986456 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 86 + layers_on_npu: 83 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 86 - job_id: jogkykyyp + total_layers: 83 + job_id: j1glekr8p job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +93,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.629154Z' + timestamp: '2024-06-08T23:27:50Z' - torchscript_onnx_tflite: - inference_time: 1382.0 - throughput: 723.589001447178 + inference_time: 1386.0 + throughput: 721.5007215007215 estimated_peak_memory_range: min: 12288 - max: 54370528 + max: 56539024 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +107,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 80 - job_id: jqp4w2vqg + job_id: jegnre8r5 job_status: Passed torchscript_onnx_qnn: - inference_time: 1538.0 - throughput: 650.1950585175553 + inference_time: 1532.0 + throughput: 652.7415143603133 estimated_peak_memory_range: - min: 167936 - max: 43727936 + min: 172032 + max: 45717904 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,22 +122,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 78 - job_id: jep2mzmx5 + job_id: j1p8w7vxp job_status: Passed torchscript_onnx_ort: - inference_time: 1619.0 - throughput: 617.6652254478073 + inference_time: 1574.0 + throughput: 635.3240152477764 estimated_peak_memory_range: - min: 618496 - max: 30723328 + min: 12288 + max: 29772112 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 86 + layers_on_npu: 83 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 86 - job_id: jn5q2d275 + total_layers: 83 + job_id: jw56q1l0g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +146,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.629211Z' + timestamp: '2024-06-08T23:27:51Z' - torchscript_onnx_tflite: - inference_time: 1829.0 - throughput: 546.7468562055768 + inference_time: 1824.0 + throughput: 548.2456140350877 estimated_peak_memory_range: - min: 12288 - max: 2066200 + min: 24576 + max: 86925416 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +160,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 80 - job_id: j0px1zyjg + job_id: jopr1yj9g job_status: Passed torchscript_onnx_qnn: - inference_time: 2028.0 - throughput: 493.0966469428008 + inference_time: 2034.0 + throughput: 491.6420845624385 estimated_peak_memory_range: - min: 0 - max: 218893968 + min: 12288 + max: 7539488 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +175,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 78 - job_id: j2p0rxr2p + job_id: jn5q92o4p job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +184,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.629249Z' + timestamp: '2024-06-08T23:27:49Z' - torchscript_onnx_tflite: - inference_time: 8003.0 - throughput: 124.95314257153568 + inference_time: 7862.0 + throughput: 127.1940981938438 estimated_peak_memory_range: - min: 40960 - max: 27300016 + min: 12288 + max: 27235632 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,22 +198,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 80 - job_id: jo5mzl3yp - job_status: Passed - torchscript_onnx_qnn: - inference_time: 8575.0 - throughput: 116.61807580174927 - estimated_peak_memory_range: - min: 184320 - max: 42300288 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 78 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 78 - job_id: j1p87k7z5 + job_id: jep23mn4g job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -222,13 +207,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-05-29T18:59:43.629286Z' + timestamp: '2024-06-08T23:27:44Z' - torchscript_onnx_tflite: - inference_time: 23877.0 - throughput: 41.88130837207354 + inference_time: 23597.0 + throughput: 42.3782684239522 estimated_peak_memory_range: - min: 49152 - max: 1969856 + min: 53248 + max: 3084328 primary_compute_unit: NPU precision: int8 layer_info: @@ -236,7 +221,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 80 - job_id: jegnewevg + job_id: jqpyvd07p job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -245,13 +230,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-05-29T18:59:43.629307Z' + timestamp: '2024-06-08T23:27:45Z' - torchscript_onnx_qnn: - inference_time: 1947.0 - throughput: 513.6106831022086 + inference_time: 1964.0 + throughput: 509.1649694501018 estimated_peak_memory_range: - min: 331776 - max: 331776 + min: 368640 + max: 368640 primary_compute_unit: NPU precision: int8 layer_info: @@ -259,22 +244,22 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 78 - job_id: jqpydydrp + job_id: jogkrym25 job_status: Passed torchscript_onnx_ort: - inference_time: 1934.0 - throughput: 517.063081695967 + inference_time: 1848.0 + throughput: 541.1255411255411 estimated_peak_memory_range: - min: 117841920 - max: 117841920 + min: 23400448 + max: 23400448 primary_compute_unit: NPU precision: int8 layer_info: - layers_on_npu: 86 + layers_on_npu: 83 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 86 - job_id: jw56101vp + total_layers: 83 + job_id: j1p3qm2l5 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -283,4 +268,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.629346Z' + timestamp: '2024-06-08T23:27:52Z' diff --git a/qai_hub_models/models/xlsr/README.md b/qai_hub_models/models/xlsr/README.md index 1b462ab6..dc29fc0f 100644 --- a/qai_hub_models/models/xlsr/README.md +++ b/qai_hub_models/models/xlsr/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of XLSR can be found [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Extremely Lightweight Quantization Robust Real-Time Single-Image Super Resolution for Mobile Devices](https://arxiv.org/abs/2105.10288) diff --git a/qai_hub_models/models/xlsr/demo.py b/qai_hub_models/models/xlsr/demo.py index 942a23f3..9b01472b 100644 --- a/qai_hub_models/models/xlsr/demo.py +++ b/qai_hub_models/models/xlsr/demo.py @@ -3,16 +3,11 @@ # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- from qai_hub_models.models._shared.super_resolution.demo import super_resolution_demo -from qai_hub_models.models.xlsr.model import MODEL_ASSET_VERSION, MODEL_ID, XLSR -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset - -IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, "xlsr_demo.jpg" -) +from qai_hub_models.models.xlsr.model import MODEL_ID, XLSR def main(is_test: bool = False): - super_resolution_demo(XLSR, MODEL_ID, IMAGE_ADDRESS, is_test) + super_resolution_demo(XLSR, MODEL_ID, is_test=is_test) if __name__ == "__main__": diff --git a/qai_hub_models/models/xlsr/export.py b/qai_hub_models/models/xlsr/export.py index 9f2e8c9d..dfc3b401 100644 --- a/qai_hub_models/models/xlsr/export.py +++ b/qai_hub_models/models/xlsr/export.py @@ -186,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/xlsr/info.yaml b/qai_hub_models/models/xlsr/info.yaml index cec3ec6d..b7ff7c32 100644 --- a/qai_hub_models/models/xlsr/info.yaml +++ b/qai_hub_models/models/xlsr/info.yaml @@ -11,13 +11,14 @@ research_paper: https://arxiv.org/abs/2105.10288 research_paper_title: Extremely Lightweight Quantization Robust Real-Time Single-Image Super Resolution for Mobile Devices license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf -deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf +deploy_license: + https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/xlsr technical_details: - Model checkpoint: xlsr_4x_checkpoint_float32 - Input resolution: 128x128 - Number of parameters: 28.0K - Model size: 116 KB + Model checkpoint: xlsr_3x_checkpoint + Input resolution: 640x360 + Number of parameters: 22.0K + Model size: 92.7 KB applicable_scenarios: - Virtual Real Estate Tours - Gaming diff --git a/qai_hub_models/models/xlsr/model.py b/qai_hub_models/models/xlsr/model.py index 5ad0eed8..4c3e804c 100644 --- a/qai_hub_models/models/xlsr/model.py +++ b/qai_hub_models/models/xlsr/model.py @@ -4,86 +4,41 @@ # --------------------------------------------------------------------- from __future__ import annotations -import torch +from pathlib import Path -from qai_hub_models.evaluators.base_evaluators import BaseEvaluator -from qai_hub_models.evaluators.superres_evaluator import SuperResolutionOutputEvaluator +from qai_hub_models.models._shared.super_resolution.model import ( + DEFAULT_SCALE_FACTOR, + SuperResolutionModel, + validate_scale_factor, +) from qai_hub_models.utils.aimet.repo import aimet_zoo_as_root -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset -from qai_hub_models.utils.base_model import BaseModel -from qai_hub_models.utils.input_spec import InputSpec +from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_torch MODEL_ID = __name__.split(".")[-2] MODEL_ASSET_VERSION = 2 -# Weights and config stored in S3 are sourced from -# https://github.com/quic/aimet-model-zoo/blob/develop/aimet_zoo_torch/xlsr/model/model_cards/xlsr_4x_w8a8.json -# https://github.com/quic/aimet-model-zoo/releases/download/phase_2_february_artifacts/xlsr_4x_checkpoint_float32.pth.tar -XLSR_WEIGHTS = "xlsr_4x_checkpoint_float32.pth.tar" -XLSR_SOURCE_REPOSITORY = "https://github.com/quic/aimet-model-zoo" -XLSR_SOURCE_REPO_COMMIT = "d09d2b0404d10f71a7640a87e9d5e5257b028802" -SCALING_FACTOR = 4 +BASE_ASSET_URL = "https://github.com/quic/aimet-model-zoo/releases/download/phase_2_february_artifacts/xlsr_{scale_factor}x_checkpoint_float32.pth.tar" -class XLSR(BaseModel): +class XLSR(SuperResolutionModel): """Exportable XLSR super resolution model, end-to-end.""" - def __init__( - self, - xlsr_model: torch.nn.Module, - ) -> None: - super().__init__() - self.model = xlsr_model - @classmethod - def from_pretrained(cls) -> XLSR: - model = _load_xlsr_source_model() - dst = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, XLSR_WEIGHTS - ).fetch() - checkpoint = torch.load(dst, map_location=torch.device("cpu")) - model.load_state_dict(checkpoint["state_dict"]) - model.eval() - - return cls(model) - - def get_evaluator(self) -> BaseEvaluator: - return SuperResolutionOutputEvaluator() - - def forward(self, image): - """ - Run XLSR on `image`, and produce an upscaled image - - Parameters: - image: Pixel values pre-processed for model consumption. - Range: float[0, 1] - 3-channel Color Space: RGB - - Returns: - image: Pixel values - Range: float[0, 1] - 3-channel Color Space: RGB - """ - return self.model(image) - - @staticmethod - def get_input_spec( - batch_size: int = 1, - num_channels: int = 3, - height: int = 128, - width: int = 128, - ) -> InputSpec: - # Get the input specification ordered (name -> (shape, type)) pairs for this model. - # - # This can be used with the qai_hub python API to declare - # the model input specification upon submitting a profile job. - return {"image": ((batch_size, num_channels, height, width), "float32")} - - -def _load_xlsr_source_model() -> torch.nn.Module: - # Load XLSR model from the source repository using the given weights. - # Returns .utils.super_resolution.models.XLSRRelease - with aimet_zoo_as_root(): - # necessary import. `modeling.deeplab` comes from the XLSR repo. - from aimet_zoo_torch.common.super_resolution.models import XLSRRelease - - return XLSRRelease(scaling_factor=SCALING_FACTOR) + def from_pretrained(cls, scale_factor: int = DEFAULT_SCALE_FACTOR) -> XLSR: + validate_scale_factor(scale_factor) + with aimet_zoo_as_root(): + from aimet_zoo_torch.common.super_resolution.models import XLSRRelease + + model = XLSRRelease(scaling_factor=scale_factor) + + url = BASE_ASSET_URL.format(scale_factor=scale_factor) + checkpoint_asset = CachedWebModelAsset( + url, + MODEL_ID, + MODEL_ASSET_VERSION, + Path(url).name, + ) + checkpoint = load_torch(checkpoint_asset) + model.load_state_dict(checkpoint["state_dict"]) + model.eval() + + return cls(model, scale_factor) diff --git a/qai_hub_models/models/xlsr/perf.yaml b/qai_hub_models/models/xlsr/perf.yaml index 90b9cd40..9274c714 100644 --- a/qai_hub_models/models/xlsr/perf.yaml +++ b/qai_hub_models/models/xlsr/perf.yaml @@ -36,11 +36,11 @@ models: - name: XLSR performance_metrics: - torchscript_onnx_tflite: - inference_time: 2487.0 - throughput: 402.09087253719343 + inference_time: 2486.0 + throughput: 402.2526146419952 estimated_peak_memory_range: - min: 16384 - max: 16546256 + min: 32768 + max: 7588944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: j1pvwnw7g + job_id: j1gle1wmp job_status: Passed torchscript_onnx_qnn: - inference_time: 1371.0 - throughput: 729.3946024799417 + inference_time: 1374.0 + throughput: 727.802037845706 estimated_peak_memory_range: - min: 217088 - max: 3511184 + min: 24576 + max: 15889328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jygz707zp + job_id: jwgoe4dkp job_status: Passed torchscript_onnx_ort: - inference_time: 1549.0 - throughput: 645.577792123951 + inference_time: 1554.0 + throughput: 643.5006435006435 estimated_peak_memory_range: - min: 12288 - max: 13112960 + min: 221184 + max: 17637032 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 23 - job_id: jvgdvmvkg + job_id: jygzv4zxp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.672571Z' + timestamp: '2024-06-11T11:59:35Z' - torchscript_onnx_tflite: - inference_time: 1871.0 - throughput: 534.4735435595938 + inference_time: 1792.0 + throughput: 558.0357142857143 estimated_peak_memory_range: min: 16384 - max: 20329008 + max: 20986272 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: j7gjl8l7p + job_id: jw56qdoyg job_status: Passed torchscript_onnx_qnn: - inference_time: 834.0 - throughput: 1199.0407673860911 + inference_time: 840.0 + throughput: 1190.4761904761904 estimated_peak_memory_range: - min: 0 - max: 17215088 + min: 212992 + max: 20099296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jz5w9r9zp + job_id: j1pvz92rg job_status: Passed torchscript_onnx_ort: - inference_time: 995.0 - throughput: 1005.0251256281407 + inference_time: 1035.0 + throughput: 966.1835748792271 estimated_peak_memory_range: - min: 0 - max: 15534560 + min: 212992 + max: 14654368 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 23 - job_id: jz5w9r9jp + job_id: jz5wm1ymg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.672614Z' + timestamp: '2024-06-11T11:59:35Z' - torchscript_onnx_tflite: - inference_time: 2515.0 - throughput: 397.61431411530816 + inference_time: 2862.0 + throughput: 349.4060097833683 estimated_peak_memory_range: - min: 20480 - max: 7451512 + min: 28672 + max: 1426392 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 16 - job_id: jlpevnv75 + job_id: j1p3qwon5 job_status: Passed torchscript_onnx_qnn: inference_time: 1370.0 throughput: 729.92700729927 estimated_peak_memory_range: - min: 28672 - max: 3445512 + min: 217088 + max: 9171344 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jnp18m8kg + job_id: jlpe4l6v5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.672640Z' + timestamp: '2024-06-11T11:59:34Z' - torchscript_onnx_qnn: - inference_time: 3622.0 - throughput: 276.09055770292656 + inference_time: 3631.0 + throughput: 275.40622418066647 estimated_peak_memory_range: - min: 212992 - max: 212992 + min: 221184 + max: 221184 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jmg94q4q5 + job_id: j7gjkw3e5 job_status: Passed torchscript_onnx_ort: - inference_time: 1535.0 - throughput: 651.4657980456026 + inference_time: 1489.0 + throughput: 671.591672263264 estimated_peak_memory_range: - min: 8708096 - max: 8708096 + min: 8957952 + max: 8957952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 23 - job_id: jmg94q4v5 + job_id: jnp1qvo7g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.672667Z' + timestamp: '2024-06-11T11:59:36Z' diff --git a/qai_hub_models/models/xlsr/test.py b/qai_hub_models/models/xlsr/test.py index 1ce0cdd8..03726f7e 100644 --- a/qai_hub_models/models/xlsr/test.py +++ b/qai_hub_models/models/xlsr/test.py @@ -5,7 +5,7 @@ import numpy as np from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp -from qai_hub_models.models.xlsr.demo import IMAGE_ADDRESS +from qai_hub_models.models._shared.super_resolution.demo import IMAGE_ADDRESS from qai_hub_models.models.xlsr.demo import main as demo_main from qai_hub_models.models.xlsr.model import MODEL_ASSET_VERSION, MODEL_ID, XLSR from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image diff --git a/qai_hub_models/models/xlsr_quantized/README.md b/qai_hub_models/models/xlsr_quantized/README.md index bd67774d..483777d1 100644 --- a/qai_hub_models/models/xlsr_quantized/README.md +++ b/qai_hub_models/models/xlsr_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of XLSR-Quantized can be found [here](https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [Extremely Lightweight Quantization Robust Real-Time Single-Image Super Resolution for Mobile Devices](https://arxiv.org/abs/2105.10288) diff --git a/qai_hub_models/models/xlsr_quantized/demo.py b/qai_hub_models/models/xlsr_quantized/demo.py index af51277d..3f5096e5 100644 --- a/qai_hub_models/models/xlsr_quantized/demo.py +++ b/qai_hub_models/models/xlsr_quantized/demo.py @@ -3,26 +3,14 @@ # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- from qai_hub_models.models._shared.super_resolution.demo import super_resolution_demo -from qai_hub_models.models.xlsr_quantized.model import ( - MODEL_ASSET_VERSION, - MODEL_ID, - XLSRQuantizable, -) -from qai_hub_models.utils.asset_loaders import CachedWebModelAsset -from qai_hub_models.utils.base_model import TargetRuntime - -IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, "xlsr_quantized_demo.jpg" -) +from qai_hub_models.models.xlsr_quantized.model import MODEL_ID, XLSRQuantizable def main(is_test: bool = False): super_resolution_demo( XLSRQuantizable, MODEL_ID, - IMAGE_ADDRESS, - is_test, - available_target_runtimes=[TargetRuntime.TFLITE], + is_test=is_test, ) diff --git a/qai_hub_models/models/xlsr_quantized/export.py b/qai_hub_models/models/xlsr_quantized/export.py index 45d6057b..a0ddab0f 100644 --- a/qai_hub_models/models/xlsr_quantized/export.py +++ b/qai_hub_models/models/xlsr_quantized/export.py @@ -194,7 +194,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/xlsr_quantized/info.yaml b/qai_hub_models/models/xlsr_quantized/info.yaml index 38920617..cadc40fc 100644 --- a/qai_hub_models/models/xlsr_quantized/info.yaml +++ b/qai_hub_models/models/xlsr_quantized/info.yaml @@ -12,13 +12,14 @@ research_paper: https://arxiv.org/abs/2105.10288 research_paper_title: Extremely Lightweight Quantization Robust Real-Time Single-Image Super Resolution for Mobile Devices license: https://github.com/quic/aimet-model-zoo/blob/develop/LICENSE.pdf -deploy_license: https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf +deploy_license: + https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf source_repo: https://github.com/quic/aimet-model-zoo/tree/develop/aimet_zoo_torch/xlsr technical_details: - Model checkpoint: xlsr_4x_checkpoint_w8a8 - Input resolution: 128x128 - Number of parameters: 28.0K - Model size: 47.0 KB + Model checkpoint: xlsr_3x_checkpoint + Input resolution: 640x360 + Number of parameters: 22.0K + Model size: 39.0 KB applicable_scenarios: - Virtual Real Estate Tours - Gaming diff --git a/qai_hub_models/models/xlsr_quantized/model.py b/qai_hub_models/models/xlsr_quantized/model.py index 7ff4cd2c..c4115c75 100644 --- a/qai_hub_models/models/xlsr_quantized/model.py +++ b/qai_hub_models/models/xlsr_quantized/model.py @@ -12,12 +12,12 @@ ) # isort: on - import torch from aimet_torch.cross_layer_equalization import equalize_model from aimet_torch.model_preparer import prepare_model from aimet_torch.quantsim import QuantizationSimModel, load_encodings_to_sim +from qai_hub_models.models._shared.super_resolution.model import DEFAULT_SCALE_FACTOR from qai_hub_models.models.xlsr.model import XLSR from qai_hub_models.utils.aimet.config_loader import get_default_aimet_config from qai_hub_models.utils.asset_loaders import CachedWebModelAsset @@ -25,7 +25,6 @@ MODEL_ID = __name__.split(".")[-2] MODEL_ASSET_VERSION = 3 DEFAULT_ENCODINGS = "xlsr_quantized_encodings.json" -SCALING_FACTOR = 4 class XLSRQuantizable(AIMETQuantizableMixin, XLSR): @@ -37,14 +36,16 @@ class XLSRQuantizable(AIMETQuantizableMixin, XLSR): def __init__( self, xlsr_model: QuantizationSimModel, + scale_factor: int, ) -> None: - XLSR.__init__(self, xlsr_model.model) + XLSR.__init__(self, xlsr_model.model, scale_factor) AIMETQuantizableMixin.__init__(self, xlsr_model) @classmethod def from_pretrained( cls, aimet_encodings: str | None = "DEFAULT", + scale_factor: int = DEFAULT_SCALE_FACTOR, ) -> XLSRQuantizable: """ Parameters: @@ -53,7 +54,7 @@ def from_pretrained( elif None: Doesn't load any encodings. Used when computing encodings. else: Interprets as a filepath and loads the encodings stored there. """ - fp16_model = XLSR.from_pretrained() + fp16_model = XLSR.from_pretrained(scale_factor) input_shape = cls.get_input_spec()["image"][0] model = prepare_model(fp16_model) @@ -76,4 +77,4 @@ def from_pretrained( ).fetch() load_encodings_to_sim(sim, aimet_encodings) - return cls(sim) + return cls(sim, scale_factor) diff --git a/qai_hub_models/models/xlsr_quantized/perf.yaml b/qai_hub_models/models/xlsr_quantized/perf.yaml index 1676715b..c06896f0 100644 --- a/qai_hub_models/models/xlsr_quantized/perf.yaml +++ b/qai_hub_models/models/xlsr_quantized/perf.yaml @@ -42,11 +42,11 @@ models: - name: XLSR-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 1130.0 - throughput: 884.9557522123894 + inference_time: 1141.0 + throughput: 876.4241893076249 estimated_peak_memory_range: - min: 20480 - max: 1508352 + min: 28672 + max: 5356448 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,14 +54,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jqp4w2xlg + job_id: jmg99xomg job_status: Passed torchscript_onnx_qnn: - inference_time: 794.0 - throughput: 1259.4458438287154 + inference_time: 799.0 + throughput: 1251.5644555694619 estimated_peak_memory_range: - min: 12288 - max: 10075288 + min: 16384 + max: 12173096 primary_compute_unit: NPU precision: int8 layer_info: @@ -69,14 +69,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jep2mz7m5 + job_id: j0pxed085 job_status: Passed torchscript_onnx_ort: - inference_time: 1260.0 - throughput: 793.6507936507936 + inference_time: 769.0 + throughput: 1300.3901170351105 estimated_peak_memory_range: - min: 212992 - max: 10412440 + min: 12288 + max: 3749080 primary_compute_unit: NPU precision: int8 layer_info: @@ -84,7 +84,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jn5q2d7m5 + job_id: jep23vo6g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -93,13 +93,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.706754Z' + timestamp: '2024-06-11T12:00:03Z' - torchscript_onnx_tflite: - inference_time: 945.0 - throughput: 1058.2010582010582 + inference_time: 943.0 + throughput: 1060.4453870625662 estimated_peak_memory_range: - min: 12288 - max: 20770656 + min: 16384 + max: 21882800 primary_compute_unit: NPU precision: int8 layer_info: @@ -107,14 +107,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: j0px1z79g + job_id: jnp1qvong job_status: Passed torchscript_onnx_qnn: - inference_time: 545.0 - throughput: 1834.8623853211009 + inference_time: 546.0 + throughput: 1831.5018315018315 estimated_peak_memory_range: - min: 61440 - max: 18351280 + min: 65536 + max: 19116992 primary_compute_unit: NPU precision: int8 layer_info: @@ -122,14 +122,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: jqpydy44p + job_id: jo5mvd975 job_status: Passed torchscript_onnx_ort: - inference_time: 850.0 - throughput: 1176.4705882352941 + inference_time: 552.0 + throughput: 1811.5942028985507 estimated_peak_memory_range: - min: 212992 - max: 13825792 + min: 61440 + max: 18287376 primary_compute_unit: NPU precision: int8 layer_info: @@ -137,7 +137,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: j1glkq0lp + job_id: jqpyv780p job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -146,13 +146,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.706797Z' + timestamp: '2024-06-11T12:00:04Z' - torchscript_onnx_tflite: - inference_time: 1131.0 - throughput: 884.1732979664014 + inference_time: 1145.0 + throughput: 873.3624454148471 estimated_peak_memory_range: - min: 12288 - max: 1681000 + min: 106496 + max: 1718744 primary_compute_unit: NPU precision: int8 layer_info: @@ -160,14 +160,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jo5mzlwqp + job_id: jvgd7z66g job_status: Passed torchscript_onnx_qnn: - inference_time: 793.0 - throughput: 1261.034047919294 + inference_time: 807.0 + throughput: 1239.1573729863692 estimated_peak_memory_range: - min: 20480 - max: 15313544 + min: 16384 + max: 17351048 primary_compute_unit: NPU precision: int8 layer_info: @@ -175,7 +175,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: j1p87k385 + job_id: jopr1nxkg job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -184,13 +184,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.706823Z' + timestamp: '2024-06-11T12:00:02Z' - torchscript_onnx_tflite: - inference_time: 3650.0 - throughput: 273.972602739726 + inference_time: 2637.0 + throughput: 379.21880925293897 estimated_peak_memory_range: min: 12288 - max: 15804208 + max: 14920896 primary_compute_unit: NPU precision: int8 layer_info: @@ -198,22 +198,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 17 - job_id: jegnew9mg - job_status: Passed - torchscript_onnx_qnn: - inference_time: 1961.0 - throughput: 509.94390617032127 - estimated_peak_memory_range: - min: 61440 - max: 18977360 - primary_compute_unit: NPU - precision: int8 - layer_info: - layers_on_npu: 17 - layers_on_gpu: 0 - layers_on_cpu: 0 - total_layers: 17 - job_id: jogkyklop + job_id: jz57v7on5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -222,13 +207,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-05-29T18:59:43.706849Z' + timestamp: '2024-06-11T11:59:56Z' - torchscript_onnx_tflite: - inference_time: 14496.0 - throughput: 68.98454746136865 + inference_time: 11523.0 + throughput: 86.78295582747549 estimated_peak_memory_range: - min: 6959104 - max: 13312008 + min: 2777088 + max: 8508512 primary_compute_unit: GPU precision: int8 layer_info: @@ -236,7 +221,7 @@ models: layers_on_gpu: 9 layers_on_cpu: 5 total_layers: 17 - job_id: jopry74eg + job_id: jqp4j9e2p job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -245,13 +230,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-05-29T18:59:43.706864Z' + timestamp: '2024-06-11T11:59:56Z' - torchscript_onnx_qnn: - inference_time: 951.0 - throughput: 1051.5247108307046 + inference_time: 960.0 + throughput: 1041.6666666666667 estimated_peak_memory_range: - min: 53248 - max: 53248 + min: 57344 + max: 57344 primary_compute_unit: NPU precision: int8 layer_info: @@ -259,14 +244,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 17 - job_id: j2p0rx1ep + job_id: jegnr71j5 job_status: Passed torchscript_onnx_ort: - inference_time: 1145.0 - throughput: 873.3624454148471 + inference_time: 750.0 + throughput: 1333.3333333333333 estimated_peak_memory_range: - min: 8810496 - max: 8810496 + min: 7811072 + max: 7811072 primary_compute_unit: NPU precision: int8 layer_info: @@ -274,7 +259,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 21 - job_id: jw561037p + job_id: j2p0ev905 job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -283,4 +268,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.706892Z' + timestamp: '2024-06-11T12:00:04Z' diff --git a/qai_hub_models/models/xlsr_quantized/test.py b/qai_hub_models/models/xlsr_quantized/test.py index 7ec905dc..609cda51 100644 --- a/qai_hub_models/models/xlsr_quantized/test.py +++ b/qai_hub_models/models/xlsr_quantized/test.py @@ -6,19 +6,15 @@ import torch from qai_hub_models.models._shared.super_resolution.app import SuperResolutionApp -from qai_hub_models.models.xlsr_quantized.demo import IMAGE_ADDRESS +from qai_hub_models.models._shared.super_resolution.demo import IMAGE_ADDRESS +from qai_hub_models.models.xlsr.model import MODEL_ASSET_VERSION, MODEL_ID from qai_hub_models.models.xlsr_quantized.demo import main as demo_main -from qai_hub_models.models.xlsr_quantized.model import ( - MODEL_ASSET_VERSION, - MODEL_ID, - XLSRQuantizable, -) +from qai_hub_models.models.xlsr_quantized.model import XLSRQuantizable from qai_hub_models.utils.asset_loaders import CachedWebModelAsset, load_image -from qai_hub_models.utils.testing import skip_clone_repo_check +from qai_hub_models.utils.testing import assert_most_close, skip_clone_repo_check -OUTPUT_IMAGE_LOCAL_PATH = "xlsr_quantized_demo_output.png" OUTPUT_IMAGE_ADDRESS = CachedWebModelAsset.from_asset_store( - MODEL_ID, MODEL_ASSET_VERSION, OUTPUT_IMAGE_LOCAL_PATH + MODEL_ID, MODEL_ASSET_VERSION, "xlsr_demo_output.png" ) @@ -32,9 +28,10 @@ def test_task(): app = SuperResolutionApp(model=model) app_output_image = app.upscale_image(image)[0] - np.testing.assert_allclose( + assert_most_close( np.asarray(app_output_image, dtype=np.float32) / 255, np.asarray(output_image, dtype=np.float32) / 255, + diff_tol=1e-4, rtol=0.02, atol=0.2, ) diff --git a/qai_hub_models/models/yolonas/README.md b/qai_hub_models/models/yolonas/README.md index 15cb8fb0..d6f5aca9 100644 --- a/qai_hub_models/models/yolonas/README.md +++ b/qai_hub_models/models/yolonas/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Yolo-NAS can be found [here](https://github.com/Deci-AI/super-gradients/blob/master/LICENSE.YOLONAS.md). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [YOLO-NAS by Deci Achieves SOTA Performance on Object Detection Using Neural Architecture Search](https://deci.ai/blog/yolo-nas-object-detection-foundation-model/) diff --git a/qai_hub_models/models/yolonas/export.py b/qai_hub_models/models/yolonas/export.py index 15a8dcc5..3edacba9 100644 --- a/qai_hub_models/models/yolonas/export.py +++ b/qai_hub_models/models/yolonas/export.py @@ -186,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/yolonas/info.yaml b/qai_hub_models/models/yolonas/info.yaml index b2b6b9e2..a6b7b84b 100644 --- a/qai_hub_models/models/yolonas/info.yaml +++ b/qai_hub_models/models/yolonas/info.yaml @@ -38,3 +38,4 @@ license_type: apache-2.0 deploy_license_type: AI Model Hub License dataset: - COCO +labels_file: coco_labels.txt diff --git a/qai_hub_models/models/yolonas/perf.yaml b/qai_hub_models/models/yolonas/perf.yaml index c9303e01..4798067d 100644 --- a/qai_hub_models/models/yolonas/perf.yaml +++ b/qai_hub_models/models/yolonas/perf.yaml @@ -36,11 +36,11 @@ models: - name: Yolo-NAS performance_metrics: - torchscript_onnx_tflite: - inference_time: 11709.0 - throughput: 85.40438978563498 + inference_time: 12935.0 + throughput: 77.30962504831851 estimated_peak_memory_range: - min: 53248 - max: 7527368 + min: 245760 + max: 7789312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 201 - job_id: jwgov91d5 + job_id: jmg993llg job_status: Passed torchscript_onnx_qnn: - inference_time: 14818.0 - throughput: 67.48549061951681 + inference_time: 14574.0 + throughput: 68.61534239055852 estimated_peak_memory_range: - min: 4919296 - max: 24147176 + min: 5861376 + max: 20985784 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jlpevnr05 + job_id: jz57vjyl5 job_status: Passed torchscript_onnx_ort: - inference_time: 9857.0 - throughput: 101.45074566298062 + inference_time: 9949.0 + throughput: 100.51261433309881 estimated_peak_memory_range: - min: 438272 - max: 60081736 + min: 540672 + max: 61160336 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jnp18mdlg + job_id: jegnr96r5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.750765Z' + timestamp: '2024-06-08T23:29:29Z' - torchscript_onnx_tflite: - inference_time: 8112.0 - throughput: 123.2741617357002 + inference_time: 9036.0 + throughput: 110.66843736166446 estimated_peak_memory_range: - min: 241664 - max: 95426800 + min: 217088 + max: 99001056 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 201 - job_id: j1pvwn1mg + job_id: jnp1qd42g job_status: Passed torchscript_onnx_qnn: - inference_time: 10117.0 - throughput: 98.84353069091628 + inference_time: 10109.0 + throughput: 98.92175289346127 estimated_peak_memory_range: - min: 4947968 - max: 97898304 + min: 4931584 + max: 92525504 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jygz70x6p + job_id: jqp4jxlvp job_status: Passed torchscript_onnx_ort: - inference_time: 6782.0 - throughput: 147.4491300501327 + inference_time: 6486.0 + throughput: 154.17823003391922 estimated_peak_memory_range: - min: 7028736 - max: 53864960 + min: 4931584 + max: 56975920 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jvgdvmrlg + job_id: jopr14v9g job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.750867Z' + timestamp: '2024-06-08T23:29:30Z' - torchscript_onnx_tflite: - inference_time: 11707.0 - throughput: 85.41898009737764 + inference_time: 12949.0 + throughput: 77.22604062089736 estimated_peak_memory_range: - min: 266240 - max: 4871408 + min: 225280 + max: 7472208 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 201 - job_id: j7gjl808p + job_id: jvgd7rxeg job_status: Passed torchscript_onnx_qnn: - inference_time: 15053.0 - throughput: 66.43194047698134 + inference_time: 15243.0 + throughput: 65.603883749918 estimated_peak_memory_range: - min: 4960256 - max: 20004872 + min: 4952064 + max: 23136736 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jmg94q3v5 + job_id: jo5mvwnw5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,13 +178,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.750934Z' + timestamp: '2024-06-08T23:29:28Z' - torchscript_onnx_qnn: - inference_time: 11914.0 - throughput: 83.9348665435622 + inference_time: 11897.0 + throughput: 84.05480373203329 estimated_peak_memory_range: - min: 4923392 - max: 4923392 + min: 4808704 + max: 4808704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 289 - job_id: jz5w9rdjp + job_id: j0pxe7k15 job_status: Passed torchscript_onnx_ort: - inference_time: 10145.0 - throughput: 98.57072449482504 + inference_time: 10119.0 + throughput: 98.82399446585632 estimated_peak_memory_range: - min: 14712832 - max: 14712832 + min: 5672960 + max: 5672960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jz57d8vr5 + job_id: jep237k4g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.751010Z' + timestamp: '2024-06-08T23:29:31Z' diff --git a/qai_hub_models/models/yolonas/requirements.txt b/qai_hub_models/models/yolonas/requirements.txt index b6f0ec66..2466ff49 100644 --- a/qai_hub_models/models/yolonas/requirements.txt +++ b/qai_hub_models/models/yolonas/requirements.txt @@ -7,3 +7,5 @@ einops==0.3.2 Deprecated==1.2.11 data-gradients==0.3.1 shapely==2.0.3 +boto3==1.34.119 +torchmetrics==1.4.0.post0 diff --git a/qai_hub_models/models/yolonas_quantized/README.md b/qai_hub_models/models/yolonas_quantized/README.md index eed10e62..61c64bb3 100644 --- a/qai_hub_models/models/yolonas_quantized/README.md +++ b/qai_hub_models/models/yolonas_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Yolo-NAS-Quantized can be found [here](https://github.com/Deci-AI/super-gradients/blob/master/LICENSE.YOLONAS.md). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://qaihub-public-assets.s3.us-west-2.amazonaws.com/qai-hub-models/Qualcomm+AI+Hub+Proprietary+License.pdf) ## References * [YOLO-NAS by Deci Achieves SOTA Performance on Object Detection Using Neural Architecture Search](https://deci.ai/blog/yolo-nas-object-detection-foundation-model/) diff --git a/qai_hub_models/models/yolonas_quantized/export.py b/qai_hub_models/models/yolonas_quantized/export.py index eadf61e9..86a7b17f 100644 --- a/qai_hub_models/models/yolonas_quantized/export.py +++ b/qai_hub_models/models/yolonas_quantized/export.py @@ -194,7 +194,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -225,7 +225,12 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) + parser = export_parser( + model_cls=Model, + supports_qnn=False, + supports_ort=False, + supports_precompiled_ort=False, + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/yolonas_quantized/info.yaml b/qai_hub_models/models/yolonas_quantized/info.yaml index d3a0e9d4..3c91dd39 100644 --- a/qai_hub_models/models/yolonas_quantized/info.yaml +++ b/qai_hub_models/models/yolonas_quantized/info.yaml @@ -40,3 +40,4 @@ license_type: apache-2.0 deploy_license_type: AI Model Hub License dataset: - COCO +labels_file: coco_labels.txt diff --git a/qai_hub_models/models/yolonas_quantized/perf.yaml b/qai_hub_models/models/yolonas_quantized/perf.yaml index 64592233..00f23b93 100644 --- a/qai_hub_models/models/yolonas_quantized/perf.yaml +++ b/qai_hub_models/models/yolonas_quantized/perf.yaml @@ -42,11 +42,11 @@ models: - name: Yolo-NAS-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 6971.0 - throughput: 143.45144168698894 + inference_time: 6973.0 + throughput: 143.41029685931449 estimated_peak_memory_range: - min: 9261056 - max: 12251360 + min: 10432512 + max: 13902448 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,7 +54,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 203 - job_id: j0px1ze9g + job_id: j2p0e1z65 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -63,13 +63,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.785555Z' + timestamp: '2024-06-08T23:30:46Z' - torchscript_onnx_tflite: - inference_time: 5192.0 - throughput: 192.6040061633282 + inference_time: 5003.0 + throughput: 199.8800719568259 estimated_peak_memory_range: - min: 913408 - max: 62958576 + min: 356352 + max: 64309792 primary_compute_unit: NPU precision: int8 layer_info: @@ -77,7 +77,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 203 - job_id: jo5mzlvqp + job_id: j1p8w3qxp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -86,13 +86,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.785595Z' + timestamp: '2024-06-08T23:30:47Z' - torchscript_onnx_tflite: - inference_time: 6975.0 - throughput: 143.36917562724014 + inference_time: 6937.0 + throughput: 144.15453366008362 estimated_peak_memory_range: - min: 10153984 - max: 13662784 + min: 10485760 + max: 42442768 primary_compute_unit: NPU precision: int8 layer_info: @@ -100,7 +100,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 203 - job_id: jegnewrmg + job_id: jogkrle25 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -109,13 +109,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.785630Z' + timestamp: '2024-06-08T23:30:48Z' - torchscript_onnx_tflite: - inference_time: 22146.0 - throughput: 45.154881242662334 + inference_time: 23899.0 + throughput: 41.84275492698439 estimated_peak_memory_range: - min: 200704 - max: 52808240 + min: 765952 + max: 56795680 primary_compute_unit: NPU precision: int8 layer_info: @@ -123,7 +123,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 3 total_layers: 203 - job_id: jopry71eg + job_id: jn5q9764p job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -132,13 +132,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-05-29T18:59:43.785665Z' + timestamp: '2024-06-08T23:30:49Z' - torchscript_onnx_tflite: - inference_time: 115607.0 - throughput: 8.649995242502616 + inference_time: 131373.0 + throughput: 7.611914168055841 estimated_peak_memory_range: - min: 44011520 - max: 52977680 + min: 15310848 + max: 24594432 primary_compute_unit: CPU precision: fp32 layer_info: @@ -146,7 +146,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 203 total_layers: 203 - job_id: jep2mz3m5 + job_id: j1gle0v8p job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -155,12 +155,4 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-05-29T18:59:43.785698Z' - - reference_device_info: - name: Snapdragon X Elite CRD - os: '11' - form_factor: Compute - os_name: Windows - manufacturer: Qualcomm - chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.785705Z' + timestamp: '2024-06-08T23:30:50Z' diff --git a/qai_hub_models/models/yolonas_quantized/requirements.txt b/qai_hub_models/models/yolonas_quantized/requirements.txt index 930698bd..4904c5ac 100644 --- a/qai_hub_models/models/yolonas_quantized/requirements.txt +++ b/qai_hub_models/models/yolonas_quantized/requirements.txt @@ -8,3 +8,5 @@ einops==0.3.2 Deprecated==1.2.11 data-gradients==0.3.1 shapely==2.0.3 +boto3==1.34.119 +torchmetrics==1.4.0.post0 diff --git a/qai_hub_models/models/yolov6/README.md b/qai_hub_models/models/yolov6/README.md index d3d4f458..97f127f0 100644 --- a/qai_hub_models/models/yolov6/README.md +++ b/qai_hub_models/models/yolov6/README.md @@ -43,7 +43,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Yolo-v6 can be found [here](https://github.com/meituan/YOLOv6/blob/47625514e7480706a46ff3c0cd0252907ac12f22/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://github.com/meituan/YOLOv6/blob/47625514e7480706a46ff3c0cd0252907ac12f22/LICENSE) ## References * [YOLOv6: A Single-Stage Object Detection Framework for Industrial Applications](https://arxiv.org/abs/2209.02976) diff --git a/qai_hub_models/models/yolov6/export.py b/qai_hub_models/models/yolov6/export.py index 9895e986..1b9b17c2 100644 --- a/qai_hub_models/models/yolov6/export.py +++ b/qai_hub_models/models/yolov6/export.py @@ -186,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/yolov6/info.yaml b/qai_hub_models/models/yolov6/info.yaml index 419842fd..e9dda573 100644 --- a/qai_hub_models/models/yolov6/info.yaml +++ b/qai_hub_models/models/yolov6/info.yaml @@ -39,3 +39,4 @@ has_animated_banner: yes license_type: gpl-3.0 deploy_license_type: gpl-3.0 dataset: [] +labels_file: coco_labels.txt diff --git a/qai_hub_models/models/yolov6/perf.yaml b/qai_hub_models/models/yolov6/perf.yaml index 6a28fd5e..896ae16b 100644 --- a/qai_hub_models/models/yolov6/perf.yaml +++ b/qai_hub_models/models/yolov6/perf.yaml @@ -36,11 +36,11 @@ models: - name: Yolo-v6 performance_metrics: - torchscript_onnx_tflite: - inference_time: 6721.0 - throughput: 148.78738282993604 + inference_time: 7424.0 + throughput: 134.69827586206895 estimated_peak_memory_range: min: 12288 - max: 3249784 + max: 3603960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: jwgov9ed5 + job_id: jz5wmdw6g job_status: Passed torchscript_onnx_qnn: - inference_time: 5377.0 - throughput: 185.97731076808628 + inference_time: 5369.0 + throughput: 186.25442354255912 estimated_peak_memory_range: - min: 4210688 - max: 16028088 + min: 4968448 + max: 16471240 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jlpevn405 + job_id: jvgd7rneg job_status: Passed torchscript_onnx_ort: - inference_time: 6458.0 - throughput: 154.8467017652524 + inference_time: 7761.0 + throughput: 128.84937508053085 estimated_peak_memory_range: - min: 5177344 - max: 34730048 + min: 5341184 + max: 35743744 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jnp18mqlg + job_id: jo5mvwew5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.810603Z' + timestamp: '2024-06-08T23:31:24Z' - torchscript_onnx_tflite: - inference_time: 4753.0 - throughput: 210.39343572480539 + inference_time: 5294.0 + throughput: 188.89308651303364 estimated_peak_memory_range: - min: 20480 - max: 78753056 + min: 40960 + max: 79662544 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: j1pvwnzmg + job_id: jmg9930lg job_status: Passed torchscript_onnx_qnn: - inference_time: 3870.0 - throughput: 258.3979328165375 + inference_time: 3862.0 + throughput: 258.9331952356292 estimated_peak_memory_range: min: 4931584 - max: 92796272 + max: 95031952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jygz70v6p + job_id: jz57vj2l5 job_status: Passed torchscript_onnx_ort: - inference_time: 4911.0 - throughput: 203.62451639177357 + inference_time: 5600.0 + throughput: 178.57142857142858 estimated_peak_memory_range: - min: 4931584 - max: 65416976 + min: 835584 + max: 60500960 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jvgdvm7lg + job_id: jegnr90r5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.810694Z' + timestamp: '2024-06-08T23:31:25Z' - torchscript_onnx_tflite: - inference_time: 6718.0 - throughput: 148.85382554331647 + inference_time: 7339.0 + throughput: 136.2583458236817 estimated_peak_memory_range: - min: 229376 - max: 21141768 + min: 45056 + max: 9009312 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 182 - job_id: j7gjl8k8p + job_id: jnp1qd22g job_status: Passed torchscript_onnx_qnn: - inference_time: 5351.0 - throughput: 186.88095683049897 + inference_time: 5384.0 + throughput: 185.73551263001485 estimated_peak_memory_range: - min: 4956160 - max: 16811712 + min: 4939776 + max: 16906872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jmg94q9v5 + job_id: j0pxe7915 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.810752Z' + timestamp: '2024-06-08T23:31:23Z' - torchscript_onnx_qnn: - inference_time: 6760.0 - throughput: 147.92899408284023 + inference_time: 6812.0 + throughput: 146.7997651203758 estimated_peak_memory_range: min: 4923392 max: 4923392 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jz5w9rmjp + job_id: jqp4jxnvp job_status: Passed torchscript_onnx_ort: - inference_time: 6592.0 - throughput: 151.6990291262136 + inference_time: 6530.0 + throughput: 153.1393568147014 estimated_peak_memory_range: - min: 10207232 - max: 10207232 + min: 3538944 + max: 3538944 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 228 - job_id: jz57d86r5 + job_id: jopr1469g job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.810815Z' + timestamp: '2024-06-08T23:31:26Z' diff --git a/qai_hub_models/models/yolov7/README.md b/qai_hub_models/models/yolov7/README.md index e6ab3b03..d6bbd49d 100644 --- a/qai_hub_models/models/yolov7/README.md +++ b/qai_hub_models/models/yolov7/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Yolo-v7 can be found [here](https://github.com/WongKinYiu/yolov7/blob/main/LICENSE.md). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://github.com/WongKinYiu/yolov7/blob/main/LICENSE.md) ## References * [YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors](https://arxiv.org/abs/2207.02696) diff --git a/qai_hub_models/models/yolov7/export.py b/qai_hub_models/models/yolov7/export.py index a794246c..ecc0f421 100644 --- a/qai_hub_models/models/yolov7/export.py +++ b/qai_hub_models/models/yolov7/export.py @@ -186,7 +186,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -217,7 +217,9 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser( + model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/yolov7/info.yaml b/qai_hub_models/models/yolov7/info.yaml index 88298456..1e7dcaeb 100644 --- a/qai_hub_models/models/yolov7/info.yaml +++ b/qai_hub_models/models/yolov7/info.yaml @@ -37,3 +37,4 @@ has_animated_banner: yes license_type: gpl-3.0 deploy_license_type: gpl-3.0 dataset: [] +labels_file: coco_labels.txt diff --git a/qai_hub_models/models/yolov7/perf.yaml b/qai_hub_models/models/yolov7/perf.yaml index 3ea222f6..4a8b5cd0 100644 --- a/qai_hub_models/models/yolov7/perf.yaml +++ b/qai_hub_models/models/yolov7/perf.yaml @@ -36,11 +36,11 @@ models: - name: Yolo-v7 performance_metrics: - torchscript_onnx_tflite: - inference_time: 15910.0 - throughput: 62.853551225644246 + inference_time: 15912.0 + throughput: 62.845651080945196 estimated_peak_memory_range: - min: 659456 - max: 2744520 + min: 36864 + max: 24453640 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 12 total_layers: 215 - job_id: j0px1zm9g + job_id: jqpyv4z7p job_status: Passed torchscript_onnx_ort: - inference_time: 13428.0 - throughput: 74.47125409591898 + inference_time: 13978.0 + throughput: 71.5409929889827 estimated_peak_memory_range: - min: 2269184 - max: 36609320 + min: 1499136 + max: 35988136 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +63,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 12 total_layers: 225 - job_id: j2p0rxkep + job_id: jw56q320g job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,13 +72,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.845281Z' + timestamp: '2024-06-08T23:31:53Z' - torchscript_onnx_tflite: - inference_time: 10854.0 - throughput: 92.13193292795283 + inference_time: 10805.0 + throughput: 92.5497454881999 estimated_peak_memory_range: - min: 32768 - max: 59666608 + min: 1200128 + max: 65074384 primary_compute_unit: NPU precision: fp16 layer_info: @@ -86,14 +86,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 12 total_layers: 215 - job_id: jo5mzl4qp + job_id: j2p0e1465 job_status: Passed torchscript_onnx_ort: - inference_time: 9618.0 - throughput: 103.97171969224371 + inference_time: 8800.0 + throughput: 113.63636363636364 estimated_peak_memory_range: - min: 7049216 - max: 68347616 + min: 7557120 + max: 68407936 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +101,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 12 total_layers: 225 - job_id: j1p87k885 + job_id: j1p3q4nl5 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,13 +110,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.845348Z' + timestamp: '2024-06-08T23:31:54Z' - torchscript_onnx_tflite: - inference_time: 15980.0 - throughput: 62.57822277847309 + inference_time: 15993.0 + throughput: 62.52735571812668 estimated_peak_memory_range: - min: 1228800 - max: 3705800 + min: 1232896 + max: 3455120 primary_compute_unit: NPU precision: fp16 layer_info: @@ -124,7 +124,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 12 total_layers: 215 - job_id: jegnewxmg + job_id: j1p8w32xp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -133,13 +133,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.845383Z' + timestamp: '2024-06-08T23:31:48Z' - torchscript_onnx_ort: - inference_time: 13426.0 - throughput: 74.48234768359899 + inference_time: 13386.0 + throughput: 74.70491558344538 estimated_peak_memory_range: - min: 4927488 - max: 4927488 + min: 4964352 + max: 4964352 primary_compute_unit: NPU precision: fp16 layer_info: @@ -147,7 +147,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 12 total_layers: 225 - job_id: jogkykdop + job_id: jwgoe1zxp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -156,4 +156,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.845418Z' + timestamp: '2024-06-08T23:31:55Z' diff --git a/qai_hub_models/models/yolov7_quantized/README.md b/qai_hub_models/models/yolov7_quantized/README.md index 390b486d..2535d8d0 100644 --- a/qai_hub_models/models/yolov7_quantized/README.md +++ b/qai_hub_models/models/yolov7_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of Yolo-v7-Quantized can be found [here](https://github.com/WongKinYiu/yolov7/blob/main/LICENSE.md). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://github.com/WongKinYiu/yolov7/blob/main/LICENSE.md) ## References * [YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors](https://arxiv.org/abs/2207.02696) diff --git a/qai_hub_models/models/yolov7_quantized/export.py b/qai_hub_models/models/yolov7_quantized/export.py index 97078f0f..a8d2b1bc 100644 --- a/qai_hub_models/models/yolov7_quantized/export.py +++ b/qai_hub_models/models/yolov7_quantized/export.py @@ -194,7 +194,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -225,7 +225,12 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) + parser = export_parser( + model_cls=Model, + supports_qnn=False, + supports_ort=False, + supports_precompiled_ort=False, + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/yolov7_quantized/info.yaml b/qai_hub_models/models/yolov7_quantized/info.yaml index 9ce2d281..9799b03c 100644 --- a/qai_hub_models/models/yolov7_quantized/info.yaml +++ b/qai_hub_models/models/yolov7_quantized/info.yaml @@ -40,3 +40,4 @@ has_animated_banner: yes license_type: gpl-3.0 deploy_license_type: gpl-3.0 dataset: [] +labels_file: coco_labels.txt diff --git a/qai_hub_models/models/yolov7_quantized/perf.yaml b/qai_hub_models/models/yolov7_quantized/perf.yaml index 34341b33..765fdc6d 100644 --- a/qai_hub_models/models/yolov7_quantized/perf.yaml +++ b/qai_hub_models/models/yolov7_quantized/perf.yaml @@ -42,11 +42,11 @@ models: - name: Yolo-v7-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 4610.0 - throughput: 216.91973969631238 + inference_time: 4596.0 + throughput: 217.58050478677112 estimated_peak_memory_range: - min: 319488 - max: 4163328 + min: 311296 + max: 2244624 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,7 +54,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 1 total_layers: 226 - job_id: jw5610v7p + job_id: j7gjk0dx5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -63,13 +63,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.871316Z' + timestamp: '2024-06-08T23:32:25Z' - torchscript_onnx_tflite: - inference_time: 2954.0 - throughput: 338.52403520649966 + inference_time: 2999.0 + throughput: 333.4444814938313 estimated_peak_memory_range: - min: 12288 - max: 59764048 + min: 32768 + max: 61022912 primary_compute_unit: NPU precision: int8 layer_info: @@ -77,7 +77,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 1 total_layers: 226 - job_id: jwgov9md5 + job_id: jlpe4ro15 job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -86,13 +86,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.871356Z' + timestamp: '2024-06-08T23:32:26Z' - torchscript_onnx_tflite: - inference_time: 4573.0 - throughput: 218.67483052700635 + inference_time: 4588.0 + throughput: 217.9598953792502 estimated_peak_memory_range: - min: 278528 - max: 2823912 + min: 299008 + max: 3108488 primary_compute_unit: NPU precision: int8 layer_info: @@ -100,7 +100,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 1 total_layers: 226 - job_id: j1pvwn4mg + job_id: jygzvx2kp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -109,13 +109,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.871392Z' + timestamp: '2024-06-08T23:32:27Z' - torchscript_onnx_tflite: - inference_time: 10865.0 - throughput: 92.03865623561896 + inference_time: 10699.0 + throughput: 93.46667912889055 estimated_peak_memory_range: min: 266240 - max: 55222224 + max: 56452384 primary_compute_unit: NPU precision: int8 layer_info: @@ -123,7 +123,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 1 total_layers: 226 - job_id: jlpevn205 + job_id: jz5wmd26g job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -132,13 +132,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-05-29T18:59:43.871427Z' + timestamp: '2024-06-08T23:32:29Z' - torchscript_onnx_tflite: - inference_time: 92308.0 - throughput: 10.833297222342592 + inference_time: 93320.0 + throughput: 10.715816545220745 estimated_peak_memory_range: - min: 8777728 - max: 13117240 + min: 8769536 + max: 46392104 primary_compute_unit: GPU precision: int8 layer_info: @@ -146,7 +146,7 @@ models: layers_on_gpu: 126 layers_on_cpu: 68 total_layers: 226 - job_id: jygz70w6p + job_id: jmg993jlg job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -155,12 +155,4 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-05-29T18:59:43.871461Z' - - reference_device_info: - name: Snapdragon X Elite CRD - os: '11' - form_factor: Compute - os_name: Windows - manufacturer: Qualcomm - chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.871468Z' + timestamp: '2024-06-08T23:32:29Z' diff --git a/qai_hub_models/models/yolov8_det/README.md b/qai_hub_models/models/yolov8_det/README.md index c82afce9..aa52c80d 100644 --- a/qai_hub_models/models/yolov8_det/README.md +++ b/qai_hub_models/models/yolov8_det/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of YOLOv8-Detection can be found [here](https://github.com/ultralytics/ultralytics/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) ## References * [Ultralytics YOLOv8 Docs: Object Detection](https://docs.ultralytics.com/tasks/detect/) diff --git a/qai_hub_models/models/yolov8_det/export.py b/qai_hub_models/models/yolov8_det/export.py index 222ace93..b8418123 100644 --- a/qai_hub_models/models/yolov8_det/export.py +++ b/qai_hub_models/models/yolov8_det/export.py @@ -188,7 +188,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) diff --git a/qai_hub_models/models/yolov8_det/info.yaml b/qai_hub_models/models/yolov8_det/info.yaml index 2ede9f32..303126cd 100644 --- a/qai_hub_models/models/yolov8_det/info.yaml +++ b/qai_hub_models/models/yolov8_det/info.yaml @@ -37,3 +37,4 @@ has_animated_banner: yes license_type: agpl-3.0 deploy_license_type: agpl-3.0 dataset: [] +labels_file: coco_labels.txt diff --git a/qai_hub_models/models/yolov8_det/perf.yaml b/qai_hub_models/models/yolov8_det/perf.yaml index 6147efd8..79f7e29d 100644 --- a/qai_hub_models/models/yolov8_det/perf.yaml +++ b/qai_hub_models/models/yolov8_det/perf.yaml @@ -36,11 +36,11 @@ models: - name: YOLOv8-Detection performance_metrics: - torchscript_onnx_tflite: - inference_time: 5881.0 - throughput: 170.03910899506886 + inference_time: 5900.0 + throughput: 169.4915254237288 estimated_peak_memory_range: - min: 249856 - max: 3086376 + min: 40960 + max: 11760568 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jegnewomg + job_id: j0pxe7n35 job_status: Passed torchscript_onnx_qnn: - inference_time: 5194.0 - throughput: 192.52984212552946 + inference_time: 5248.0 + throughput: 190.5487804878049 estimated_peak_memory_range: - min: 4935680 - max: 19454832 + min: 4919296 + max: 17813040 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,14 +63,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: jqpydyq4p + job_id: jep2370rg job_status: Passed torchscript_onnx_ort: - inference_time: 6201.0 - throughput: 161.26431220770843 + inference_time: 6498.0 + throughput: 153.8935056940597 estimated_peak_memory_range: - min: 4210688 - max: 36287088 + min: 8409088 + max: 39812256 primary_compute_unit: NPU precision: fp16 layer_info: @@ -78,7 +78,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: jn5q2dzm5 + job_id: jogkrl7w5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -87,13 +87,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.896469Z' + timestamp: '2024-06-08T23:33:07Z' - torchscript_onnx_tflite: - inference_time: 4124.0 - throughput: 242.48302618816683 + inference_time: 4177.0 + throughput: 239.40627244433804 estimated_peak_memory_range: - min: 49152 - max: 86003712 + min: 16384 + max: 87350704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,14 +101,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jopry7oeg + job_id: jo5mvwqd5 job_status: Passed torchscript_onnx_qnn: - inference_time: 3684.0 - throughput: 271.4440825190011 + inference_time: 3699.0 + throughput: 270.3433360367667 estimated_peak_memory_range: min: 4931584 - max: 105033424 + max: 104903584 primary_compute_unit: NPU precision: fp16 layer_info: @@ -116,14 +116,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: j2p0rxdep + job_id: jqpyv4r8p job_status: Passed torchscript_onnx_ort: - inference_time: 4667.0 - throughput: 214.27040925648168 + inference_time: 4564.0 + throughput: 219.10604732690624 estimated_peak_memory_range: - min: 4222976 - max: 68052736 + min: 7028736 + max: 68265872 primary_compute_unit: NPU precision: fp16 layer_info: @@ -131,7 +131,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: j1glkqolp + job_id: jn5q97enp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -140,13 +140,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.896588Z' + timestamp: '2024-06-08T23:33:08Z' - torchscript_onnx_tflite: - inference_time: 5889.0 - throughput: 169.80811682798438 + inference_time: 5907.0 + throughput: 169.29067208396816 estimated_peak_memory_range: - min: 258048 - max: 2622624 + min: 245760 + max: 2242704 primary_compute_unit: NPU precision: fp16 layer_info: @@ -154,14 +154,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 290 - job_id: jep2mz4m5 + job_id: jopr1480g job_status: Passed torchscript_onnx_qnn: - inference_time: 5214.0 - throughput: 191.79133103183736 + inference_time: 5193.0 + throughput: 192.56691700365877 estimated_peak_memory_range: - min: 4931584 - max: 19569632 + min: 4947968 + max: 19559888 primary_compute_unit: NPU precision: fp16 layer_info: @@ -169,7 +169,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: jogkykoop + job_id: j1p8w30kp job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -178,10 +178,10 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.896665Z' + timestamp: '2024-06-08T23:33:06Z' - torchscript_onnx_qnn: - inference_time: 5796.0 - throughput: 172.5327812284334 + inference_time: 5771.0 + throughput: 173.28019407381737 estimated_peak_memory_range: min: 4923392 max: 4923392 @@ -192,14 +192,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 285 - job_id: j1p87k685 + job_id: j2p0e1395 job_status: Passed torchscript_onnx_ort: - inference_time: 6424.0 - throughput: 155.6662515566625 + inference_time: 6381.0 + throughput: 156.7152483936687 estimated_peak_memory_range: - min: 8925184 - max: 8925184 + min: 10723328 + max: 10723328 primary_compute_unit: NPU precision: fp16 layer_info: @@ -207,7 +207,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 286 - job_id: jw5610r7p + job_id: j1gle06jp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -216,4 +216,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.896740Z' + timestamp: '2024-06-08T23:33:09Z' diff --git a/qai_hub_models/models/yolov8_det_quantized/README.md b/qai_hub_models/models/yolov8_det_quantized/README.md index 874a00c7..75da973b 100644 --- a/qai_hub_models/models/yolov8_det_quantized/README.md +++ b/qai_hub_models/models/yolov8_det_quantized/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of YOLOv8-Detection-Quantized can be found [here](https://github.com/ultralytics/ultralytics/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) ## References * [Ultralytics YOLOv8 Docs: Object Detection](https://docs.ultralytics.com/tasks/detect/) diff --git a/qai_hub_models/models/yolov8_det_quantized/export.py b/qai_hub_models/models/yolov8_det_quantized/export.py index 8d2d1fa1..1c3d53f0 100644 --- a/qai_hub_models/models/yolov8_det_quantized/export.py +++ b/qai_hub_models/models/yolov8_det_quantized/export.py @@ -194,7 +194,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -225,7 +225,12 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False, supports_ort=False) + parser = export_parser( + model_cls=Model, + supports_qnn=False, + supports_ort=False, + supports_precompiled_ort=False, + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/yolov8_det_quantized/info.yaml b/qai_hub_models/models/yolov8_det_quantized/info.yaml index 09e86fec..5751179f 100644 --- a/qai_hub_models/models/yolov8_det_quantized/info.yaml +++ b/qai_hub_models/models/yolov8_det_quantized/info.yaml @@ -40,3 +40,4 @@ has_animated_banner: yes license_type: agpl-3.0 deploy_license_type: agpl-3.0 dataset: [] +labels_file: coco_labels.txt diff --git a/qai_hub_models/models/yolov8_det_quantized/perf.yaml b/qai_hub_models/models/yolov8_det_quantized/perf.yaml index ef54d1c0..9271d6d2 100644 --- a/qai_hub_models/models/yolov8_det_quantized/perf.yaml +++ b/qai_hub_models/models/yolov8_det_quantized/perf.yaml @@ -42,11 +42,11 @@ models: - name: YOLOv8-Detection-Quantized performance_metrics: - torchscript_onnx_tflite: - inference_time: 2342.0 - throughput: 426.9854824935952 + inference_time: 2332.0 + throughput: 428.8164665523156 estimated_peak_memory_range: min: 12288 - max: 2396192 + max: 3599048 primary_compute_unit: NPU precision: int8 layer_info: @@ -54,7 +54,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 1 total_layers: 277 - job_id: jwgov9od5 + job_id: jwgoe1kqp job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -63,13 +63,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.931301Z' + timestamp: '2024-06-08T23:33:51Z' - torchscript_onnx_tflite: - inference_time: 1597.0 - throughput: 626.1740763932373 + inference_time: 1594.0 + throughput: 627.3525721455458 estimated_peak_memory_range: min: 12288 - max: 48933824 + max: 49918192 primary_compute_unit: NPU precision: int8 layer_info: @@ -77,7 +77,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 1 total_layers: 277 - job_id: j1pvwnemg + job_id: j1pvz1rkg job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -86,13 +86,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.931347Z' + timestamp: '2024-06-08T23:33:52Z' - torchscript_onnx_tflite: - inference_time: 2340.0 - throughput: 427.35042735042737 + inference_time: 2326.0 + throughput: 429.9226139294927 estimated_peak_memory_range: - min: 16384 - max: 1938968 + min: 12288 + max: 2668824 primary_compute_unit: NPU precision: int8 layer_info: @@ -100,7 +100,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 1 total_layers: 277 - job_id: j7gjl8o8p + job_id: j7gjk02v5 job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -109,13 +109,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.931388Z' + timestamp: '2024-06-08T23:33:53Z' - torchscript_onnx_tflite: - inference_time: 7122.0 - throughput: 140.40999719180004 + inference_time: 6463.0 + throughput: 154.7269070091289 estimated_peak_memory_range: - min: 77824 - max: 34152912 + min: 81920 + max: 33931536 primary_compute_unit: NPU precision: int8 layer_info: @@ -123,7 +123,7 @@ models: layers_on_gpu: 1 layers_on_cpu: 1 total_layers: 277 - job_id: jlpevn805 + job_id: jlpe4rwo5 job_status: Passed reference_device_info: name: RB3 Gen 2 (Proxy) @@ -132,13 +132,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs6490 - timestamp: '2024-05-29T18:59:43.931428Z' + timestamp: '2024-06-08T23:33:54Z' - torchscript_onnx_tflite: - inference_time: 46687.0 - throughput: 21.419238760254462 + inference_time: 46343.0 + throughput: 21.57823187967978 estimated_peak_memory_range: - min: 2846720 - max: 17832816 + min: 1802240 + max: 10846104 primary_compute_unit: NPU precision: int8 layer_info: @@ -146,7 +146,7 @@ models: layers_on_gpu: 2 layers_on_cpu: 1 total_layers: 277 - job_id: jygz7086p + job_id: jygzvxjop job_status: Passed reference_device_info: name: RB5 (Proxy) @@ -155,12 +155,4 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8250 - timestamp: '2024-05-29T18:59:43.931467Z' - - reference_device_info: - name: Snapdragon X Elite CRD - os: '11' - form_factor: Compute - os_name: Windows - manufacturer: Qualcomm - chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.931474Z' + timestamp: '2024-06-08T23:33:55Z' diff --git a/qai_hub_models/models/yolov8_seg/README.md b/qai_hub_models/models/yolov8_seg/README.md index 518fab1f..75df2424 100644 --- a/qai_hub_models/models/yolov8_seg/README.md +++ b/qai_hub_models/models/yolov8_seg/README.md @@ -48,7 +48,7 @@ script requires access to Deployment instructions for Qualcomm® AI Hub. ## License - The license for the original implementation of YOLOv8-Segmentation can be found [here](https://github.com/ultralytics/ultralytics/blob/main/LICENSE). -- The license for the compiled assets for on-device deployment can be found [here]({deploy_license_url}) +- The license for the compiled assets for on-device deployment can be found [here](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) ## References * [Ultralytics YOLOv8 Docs: Instance Segmentation](https://docs.ultralytics.com/tasks/segment/) diff --git a/qai_hub_models/models/yolov8_seg/export.py b/qai_hub_models/models/yolov8_seg/export.py index 4156e8c5..d2ecb2c9 100644 --- a/qai_hub_models/models/yolov8_seg/export.py +++ b/qai_hub_models/models/yolov8_seg/export.py @@ -33,6 +33,7 @@ can_access_qualcomm_ai_hub, export_without_hub_access, transpose_channel_first_to_last, + transpose_channel_last_to_first, ) @@ -123,7 +124,7 @@ def export_model( # Convert outputs from channel last to channel first (preferred I/O format for QNN and TensorFlow Lite) channel_last_flags = ( - " --force_channel_last_input image" + " --force_channel_last_input image" + " --force_channel_last_output output_4" if target_runtime != TargetRuntime.ORT else "" ) @@ -188,7 +189,7 @@ def export_model( target_runtime_extension = "so" elif target_runtime == TargetRuntime.TFLITE: target_runtime_extension = "tflite" - elif target_runtime == TargetRuntime.ORT: + elif target_runtime in {TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT}: target_runtime_extension = "onnx" os.makedirs(output_path, exist_ok=True) @@ -207,6 +208,14 @@ def export_model( torch_out = torch_inference(model, sample_inputs) assert inference_job is not None and inference_job.wait().success inference_result: hub.client.DatasetEntries = inference_job.download_output_data() # type: ignore + # Convert outputs from channel last to channel first + inference_result = ( + inference_result + if target_runtime == TargetRuntime.ORT + else transpose_channel_last_to_first( + "output_4", inference_result, target_runtime + ) + ) print_inference_metrics( inference_job, inference_result, torch_out, outputs_to_skip=[3] ) @@ -219,7 +228,9 @@ def export_model( def main(): warnings.filterwarnings("ignore") - parser = export_parser(model_cls=Model, supports_qnn=False) + parser = export_parser( + model_cls=Model, supports_qnn=False, supports_precompiled_ort=False + ) args = parser.parse_args() export_model(**vars(args)) diff --git a/qai_hub_models/models/yolov8_seg/perf.yaml b/qai_hub_models/models/yolov8_seg/perf.yaml index 5b299571..b39496c1 100644 --- a/qai_hub_models/models/yolov8_seg/perf.yaml +++ b/qai_hub_models/models/yolov8_seg/perf.yaml @@ -36,11 +36,11 @@ models: - name: YOLOv8-Segmentation performance_metrics: - torchscript_onnx_tflite: - inference_time: 7371.0 - throughput: 135.666802333469 + inference_time: 7329.0 + throughput: 136.4442625187611 estimated_peak_memory_range: min: 4210688 - max: 6944112 + max: 6975488 primary_compute_unit: NPU precision: fp16 layer_info: @@ -48,14 +48,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 337 - job_id: j0px1zd1g + job_id: jo5mvw6d5 job_status: Passed torchscript_onnx_ort: - inference_time: 7864.0 - throughput: 127.1617497456765 + inference_time: 7942.0 + throughput: 125.91286829513976 estimated_peak_memory_range: - min: 15581184 - max: 42389248 + min: 14696448 + max: 42029952 primary_compute_unit: NPU precision: fp16 layer_info: @@ -63,7 +63,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 336 - job_id: j1p87k4x5 + job_id: jogkrlqw5 job_status: Passed reference_device_info: name: Samsung Galaxy S23 @@ -72,13 +72,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 2 - timestamp: '2024-05-29T18:59:43.956437Z' + timestamp: '2024-06-08T23:34:30Z' - torchscript_onnx_tflite: - inference_time: 5327.0 - throughput: 187.72292096865027 + inference_time: 5452.0 + throughput: 183.41892883345562 estimated_peak_memory_range: - min: 16384 - max: 94579136 + min: 3268608 + max: 101106816 primary_compute_unit: NPU precision: fp16 layer_info: @@ -86,14 +86,14 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 337 - job_id: jo5mzldwp + job_id: jegnr9mk5 job_status: Passed torchscript_onnx_ort: - inference_time: 5666.0 - throughput: 176.49135192375573 + inference_time: 5339.0 + throughput: 187.30099269526127 estimated_peak_memory_range: - min: 18112512 - max: 81621696 + min: 16973824 + max: 81417296 primary_compute_unit: NPU precision: fp16 layer_info: @@ -101,7 +101,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 336 - job_id: jogkyk92p + job_id: jn5q97rnp job_status: Passed reference_device_info: name: Samsung Galaxy S24 @@ -110,13 +110,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-29T18:59:43.956525Z' + timestamp: '2024-06-08T23:34:31Z' - torchscript_onnx_tflite: - inference_time: 7372.0 - throughput: 135.6483993488877 + inference_time: 7404.0 + throughput: 135.06212857914642 estimated_peak_memory_range: - min: 4579328 - max: 14344968 + min: 4583424 + max: 7403760 primary_compute_unit: NPU precision: fp16 layer_info: @@ -124,7 +124,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 337 - job_id: jegnew7rg + job_id: jopr1420g job_status: Passed reference_device_info: name: QCS8550 (Proxy) @@ -133,13 +133,13 @@ models: os_name: Android manufacturer: Qualcomm chipset: Qcs8550 - timestamp: '2024-05-29T18:59:43.956572Z' + timestamp: '2024-06-08T23:34:25Z' - torchscript_onnx_ort: - inference_time: 7723.0 - throughput: 129.48336138806164 + inference_time: 7762.0 + throughput: 128.83277505797474 estimated_peak_memory_range: - min: 22294528 - max: 22294528 + min: 22315008 + max: 22315008 primary_compute_unit: NPU precision: fp16 layer_info: @@ -147,7 +147,7 @@ models: layers_on_gpu: 0 layers_on_cpu: 0 total_layers: 336 - job_id: jn5q2dm45 + job_id: j1gle02jp job_status: Passed reference_device_info: name: Snapdragon X Elite CRD @@ -156,4 +156,4 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-29T18:59:43.956618Z' + timestamp: '2024-06-08T23:34:32Z' diff --git a/qai_hub_models/requirements-dev.txt b/qai_hub_models/requirements-dev.txt index e1b0706d..bf5cabdd 100644 --- a/qai_hub_models/requirements-dev.txt +++ b/qai_hub_models/requirements-dev.txt @@ -1,5 +1,5 @@ -boto3==1.34.40 -botocore==1.34.40 +boto3==1.34.119 +botocore==1.34.119 coverage==5.3.1 imageio[ffmpeg]==2.31.5 jinja2==3.0.3 @@ -15,4 +15,4 @@ types-PyYAML==6.0.12.12 types-pillow==10.2.0.20240213 types-tabulate==0.9.0.20240106 types-requests==2.31.0.6 -keyrings.envvars; python_version >= '3.9' # used only by CI +keyrings.envvars==1.1.0; python_version >= '3.9' # used only by CI diff --git a/qai_hub_models/utils/aimet/config_loader.py b/qai_hub_models/utils/aimet/config_loader.py index dadc6012..6fbc15b0 100644 --- a/qai_hub_models/utils/aimet/config_loader.py +++ b/qai_hub_models/utils/aimet/config_loader.py @@ -21,6 +21,11 @@ def get_default_aimet_config() -> str: return str(path.resolve()) +def get_default_per_tensor_aimet_config() -> str: + path = Path(__file__).parent / "default_per_tensor_config.json" + return str(path.resolve()) + + def get_aimet_config_path(name: str) -> str: path = Path(__file__).parent / f"{name}.json" return str(path.resolve()) diff --git a/qai_hub_models/utils/aimet/default_per_tensor_config.json b/qai_hub_models/utils/aimet/default_per_tensor_config.json new file mode 100644 index 00000000..fda59a9d --- /dev/null +++ b/qai_hub_models/utils/aimet/default_per_tensor_config.json @@ -0,0 +1,88 @@ +{ + "defaults": + { + "ops": + { + "is_output_quantized": "True" + }, + "params": + { + "is_quantized": "True", + "is_symmetric": "True" + }, + "strict_symmetric": "False", + "unsigned_symmetric": "False", + "per_channel_quantization": "False" + }, + + "params": + { + "bias": + { + "is_quantized": "False" + } + }, + + "op_type": + { + "Squeeze": + { + "is_output_quantized": "True" + }, + "Pad": + { + "is_output_quantized": "True" + }, + "Mean": + { + "is_output_quantized": "False" + }, + "Gemm": + { + "per_channel_quantization": "False" + }, + "Sigmoid": + { + "encoding_constraints": + { + "min": 0.0, + "max": 0.99609375 + } + }, + "Softmax": + { + "encoding_constraints": + { + "min": 0.0, + "max": 0.99609375 + } + } + }, + + "supergroups": + [ + { + "op_list": ["Conv", "Relu"] + }, + { + "op_list": ["Conv", "Clip"] + }, + { + "op_list": ["Conv", "BatchNormalization", "Relu"] + }, + { + "op_list": ["Add", "Relu"] + }, + { + "op_list": ["Gemm", "Relu"] + } + ], + + "model_input": + { + "is_input_quantized": "True" + }, + + "model_output": + {} +} diff --git a/qai_hub_models/utils/args.py b/qai_hub_models/utils/args.py index fa2aaefb..184ab87f 100644 --- a/qai_hub_models/utils/args.py +++ b/qai_hub_models/utils/args.py @@ -16,13 +16,15 @@ from typing import Any, List, Mapping, Optional, Set, Type import qai_hub as hub +from qai_hub.client import APIException, UserError from qai_hub_models.models.protocols import ( FromPrecompiledTypeVar, FromPretrainedProtocol, FromPretrainedTypeVar, + HubModelProtocolTypeVar, ) -from qai_hub_models.utils.base_model import BaseModel, InputSpec, TargetRuntime +from qai_hub_models.utils.base_model import BaseModel, TargetRuntime from qai_hub_models.utils.inference import HubModel, compile_model_from_args from qai_hub_models.utils.qai_hub_helpers import can_access_qualcomm_ai_hub @@ -35,7 +37,7 @@ def __init__(self, option_strings, dest, enum_type, **kwargs): self.enum_type = enum_type def __call__(self, parser, namespace, values, option_string=None): - setattr(namespace, self.dest, self.enum_type[values.upper()]) + setattr(namespace, self.dest, self.enum_type[values.upper().replace("-", "_")]) def get_parser() -> argparse.ArgumentParser: @@ -79,7 +81,7 @@ def add_target_runtime_arg( type=str, action=partial(ParseEnumAction, enum_type=TargetRuntime), # type: ignore default=default, - choices=[rt.name.lower() for rt in available_target_runtimes], + choices=[rt.name.lower().replace("_", "-") for rt in available_target_runtimes], help=help, ) return parser @@ -310,13 +312,14 @@ def demo_model_from_cli_args( inference_options=cli_args.inference_options, ) print(f"Exported asset: {model_id}\n") + else: inference_model = model_from_cli_args(model_cls, cli_args) return inference_model def get_input_spec_kwargs( - model: "BaseModel", args_dict: Mapping[str, Any] + model: Type[HubModelProtocolTypeVar], args_dict: Mapping[str, Any] ) -> Mapping[str, Any]: """ Given a dict with many args, pull out the ones relevant @@ -363,24 +366,33 @@ def get_model_input_spec_parser( def input_spec_from_cli_args( - model: "BaseModel", cli_args: argparse.Namespace -) -> "InputSpec": + model: Type[HubModelProtocolTypeVar], cli_args: argparse.Namespace +) -> hub.InputSpecs: """ Create this model's input spec from an argparse namespace. Default behavior is to assume the CLI args have the same names as get_input_spec method args. + Also, fetches shapes if demo is run on-device. """ + + is_on_device = "on_device" in cli_args and cli_args.on_device + if is_on_device and isinstance(model, HubModel): + assert isinstance(model.model.producer, hub.CompileJob) + return model.model.producer.shapes return model.get_input_spec(**get_input_spec_kwargs(model, vars(cli_args))) def get_qcom_chipsets() -> Set[str]: - return set( - [ - attr[len("chipset:") :] - for dev in hub.get_devices() - for attr in dev.attributes - if attr.startswith("chipset:qualcomm") - ] - ) + try: + return set( + [ + attr[len("chipset:") :] + for dev in hub.get_devices() + for attr in dev.attributes + if attr.startswith("chipset:qualcomm") + ] + ) + except (APIException, UserError): + return set([]) def _evaluate_export_common_parser( @@ -388,28 +400,14 @@ def _evaluate_export_common_parser( supports_tflite=True, supports_qnn=True, supports_ort=True, + supports_precompiled_ort=True, default_runtime=TargetRuntime.TFLITE, exporting_compiled_model=False, - default_export_device: str = DEFAULT_EXPORT_DEVICE, ) -> argparse.ArgumentParser: """ Common arguments between export and evaluate scripts. """ parser = get_parser() - parser.add_argument( - "--device", - type=str, - default=default_export_device, - help="Device for which to export.", - ) - parser.add_argument( - "--chipset", - type=str, - default=None, - choices=sorted(get_qcom_chipsets(), reverse=True), - help="If set, will choose a random device with this chipset. " - "Overrides whatever is set in --device.", - ) if not exporting_compiled_model: # Default runtime for compiled model is fixed for given model @@ -420,6 +418,8 @@ def _evaluate_export_common_parser( available_runtimes.append(TargetRuntime.QNN) if supports_ort: available_runtimes.append(TargetRuntime.ORT) + if supports_precompiled_ort: + available_runtimes.append(TargetRuntime.PRECOMPILED_ORT) default_runtime = _get_default_runtime(available_runtimes) add_target_runtime_arg( @@ -460,6 +460,7 @@ def export_parser( supports_tflite: bool = True, supports_qnn: bool = True, supports_ort: bool = True, + supports_precompiled_ort: bool = True, default_runtime: TargetRuntime = TargetRuntime.TFLITE, exporting_compiled_model: bool = False, default_export_device: str = DEFAULT_EXPORT_DEVICE, @@ -479,6 +480,9 @@ def export_parser( supports_ort: Whether ORT export is supported. Default=True. + supports_precompiled_ort: + Whether precompiled ORT (with QNN context binary) export is supported. + Default=True. default_runtime: Which runtime to use as default if not specified in cli args. exporting_compiled_model: True when exporting compiled model. @@ -495,9 +499,23 @@ def export_parser( supports_tflite=supports_tflite, supports_qnn=supports_qnn, supports_ort=supports_ort, + supports_precompiled_ort=supports_precompiled_ort, default_runtime=default_runtime, exporting_compiled_model=exporting_compiled_model, - default_export_device=default_export_device, + ) + parser.add_argument( + "--device", + type=str, + default=default_export_device, + help="Device for which to export.", + ) + parser.add_argument( + "--chipset", + type=str, + default=None, + choices=sorted(get_qcom_chipsets(), reverse=True), + help="If set, will choose a random device with this chipset. " + "Overrides whatever is set in --device.", ) parser.add_argument( "--skip-profiling", @@ -579,6 +597,13 @@ def evaluate_parser( supports_ort=supports_ort, default_runtime=default_runtime, ) + parser.add_argument( + "--chipset", + type=str, + default="qualcomm-snapdragon-8gen2", + choices=sorted(get_qcom_chipsets(), reverse=True), + help="Which chipset to use to run evaluation.", + ) parser.add_argument( "--split-size", type=int, diff --git a/qai_hub_models/utils/asset_loaders.py b/qai_hub_models/utils/asset_loaders.py index 5f8917d0..adf4b2bc 100644 --- a/qai_hub_models/utils/asset_loaders.py +++ b/qai_hub_models/utils/asset_loaders.py @@ -72,6 +72,20 @@ def set_log_level(log_level: int): logger.setLevel(old_level) +@contextmanager +def tmp_os_env(env_values: Dict[str, str]): + """ + Creates a context where the os environment variables are replaced with + the given values. After exiting the context, the previous env is restored. + """ + previous_env = os.environ.copy() + try: + os.environ.update(env_values) + yield + finally: + os.environ = previous_env # type: ignore + + def _query_yes_no(question, default="yes"): """ Ask a yes/no question and return their answer. @@ -364,6 +378,7 @@ def __init__( dataset_asset_folder: str, local_store_path: str, qaihm_repo: str, + labels_path: str, example_use: str, huggingface_path: str, repo_url: str, @@ -378,6 +393,7 @@ def __init__( self.model_asset_folder = model_asset_folder self.dataset_asset_folder = dataset_asset_folder self.qaihm_repo = qaihm_repo + self.labels_path = labels_path self.example_use = example_use self.huggingface_path = huggingface_path self.repo_url = repo_url @@ -402,12 +418,7 @@ def get_web_asset_url(self, model_id: str, type: QAIHM_WEB_ASSET): return ( f"{self.asset_url.rstrip('/')}/" + ( - Path( - ModelZooAssetConfig._replace_path_keywords( - self.web_asset_folder.lstrip("/"), model_id=model_id - ) - ) - / file + Path(self.web_asset_folder.lstrip("/").format(model_id=model_id)) / file ).as_posix() ) @@ -432,8 +443,8 @@ def get_relative_model_asset_path( self, model_id: str, version: Union[int, str], file_name: Path | str ) -> Path: return Path( - ModelZooAssetConfig._replace_path_keywords( - self.model_asset_folder.lstrip("/"), model_id=model_id, version=version + self.model_asset_folder.lstrip("/").format( + model_id=model_id, version=version ) ) / Path(file_name) @@ -441,10 +452,8 @@ def get_relative_dataset_asset_path( self, dataset_id: str, version: Union[int, str], file_name: Path | str ) -> Path: return Path( - ModelZooAssetConfig._replace_path_keywords( - self.dataset_asset_folder.lstrip("/"), - dataset_id=dataset_id, - version=version, + self.dataset_asset_folder.lstrip("/").format( + dataset_id=dataset_id, version=version ) ) / Path(file_name) @@ -465,48 +474,25 @@ def get_dataset_asset_url( self.get_relative_dataset_asset_path(dataset_id, version, file_name) ) + def get_labels_file_path(self, labels_file: str) -> str: + return self.labels_path.lstrip("/").format(labels_file=labels_file) + def get_qaihm_repo(self, model_id: str, relative=True) -> Path | str: - relative_path = Path( - ModelZooAssetConfig._replace_path_keywords( - self.qaihm_repo.lstrip("/"), model_id=model_id - ) - ) + relative_path = Path(self.qaihm_repo.lstrip("/").format(model_id=model_id)) if not relative: return f"{self.repo_url.rstrip('/')}/{relative_path.as_posix()}" return relative_path def get_website_url(self, model_id: str, relative=False) -> Path | str: relative_path = Path( - ModelZooAssetConfig._replace_path_keywords( - self.models_website_relative_path.lstrip("/"), model_id=model_id - ) + self.models_website_relative_path.lstrip("/").format(model_id=model_id) ) if not relative: return f"{self.models_website_url.rstrip('/')}/{relative_path.as_posix()}" return relative_path def get_example_use(self, model_id: str) -> str: - return ModelZooAssetConfig._replace_path_keywords( - self.example_use.lstrip("/"), model_id=model_id - ) - - ### - # Helpers - ### - @staticmethod - def _replace_path_keywords( - path: str, - model_id: Optional[str] = None, - dataset_id: Optional[str] = None, - version: Optional[Union[int, str]] = None, - ): - if model_id: - path = path.replace("{model_id}", model_id) - if dataset_id: - path = path.replace("{dataset_id}", dataset_id) - if version: - path = path.replace("{version}", str(version)) - return path + return self.example_use.lstrip("/").format(model_id=model_id) ### # Load from CFG @@ -531,6 +517,7 @@ def from_cfg( asset_cfg["dataset_asset_folder"], local_store_path, asset_cfg["qaihm_repo"], + asset_cfg["labels_path"], asset_cfg["example_use"], asset_cfg["huggingface_path"], asset_cfg["repo_url"], @@ -548,6 +535,7 @@ def from_cfg( "animated_web_banner_filename": str, "model_asset_folder": str, "qaihm_repo": str, + "labels_path": str, "example_use": str, "huggingface_path": str, "repo_url": str, diff --git a/qai_hub_models/utils/base_model.py b/qai_hub_models/utils/base_model.py index b2a55154..377fc357 100644 --- a/qai_hub_models/utils/base_model.py +++ b/qai_hub_models/utils/base_model.py @@ -178,6 +178,8 @@ def get_hub_compile_options( target_runtime_flag = "onnx" elif target_runtime == TargetRuntime.TFLITE: target_runtime_flag = "tflite" + elif target_runtime == TargetRuntime.PRECOMPILED_ORT: + target_runtime_flag = "compiled_qnn_onnx" else: raise NotImplementedError() diff --git a/qai_hub_models/utils/config_loaders.py b/qai_hub_models/utils/config_loaders.py index 3289b900..0c36432f 100644 --- a/qai_hub_models/utils/config_loaders.py +++ b/qai_hub_models/utils/config_loaders.py @@ -227,6 +227,10 @@ def map_to_hf_pipeline_tag(self): QNN_PATH = "torchscript_onnx_qnn" +def bytes_to_mb(num_bytes: int) -> int: + return round(num_bytes / (1 << 20)) + + class QAIHMModelPerf: """Class to read the perf.yaml and parse it for displaying it on HuggingFace.""" @@ -301,12 +305,8 @@ def get_row(self, skip, summary_list, initial_row, model_type, has_assets=True): for summary, name in zip(summary_list, names): inf_time = summary["inference_time"] inference_time = f"{inf_time / 1000} ms" - mem_min = round( - summary["estimated_peak_memory_range"]["min"] / 1024 / 1024 - ) - mem_max = round( - summary["estimated_peak_memory_range"]["max"] / 1024 / 1024 - ) + mem_min = bytes_to_mb(summary["estimated_peak_memory_range"]["min"]) + mem_max = bytes_to_mb(summary["estimated_peak_memory_range"]["max"]) peak_memory_range = f"{mem_min} - {mem_max} MB" if model_type == "tflite": self.tflite_inference_time = inference_time @@ -501,6 +501,7 @@ def __init__( inference_metrics: str, additional_readme_section: str, skip_example_usage: bool, + eval_datasets: List[str], ) -> None: self.is_aimet = is_aimet self.has_on_target_demo = has_on_target_demo @@ -523,6 +524,7 @@ def __init__( self.additional_readme_section = additional_readme_section self.skip_export = skip_export self.skip_example_usage = skip_example_usage + self.eval_datasets = eval_datasets def validate(self) -> Tuple[bool, Optional[str]]: """Returns false with a reason if the info spec for this model is not valid.""" @@ -563,6 +565,7 @@ def from_yaml( code_gen_config["additional_readme_section"], code_gen_config["skip_export"], code_gen_config["skip_example_usage"], + code_gen_config["eval_datasets"], ) # Schema for code-gen.yaml @@ -591,6 +594,7 @@ def from_yaml( OptionalSchema("additional_readme_section", default=""): str, OptionalSchema("skip_export", default=False): bool, OptionalSchema("skip_example_usage", default=False): bool, + OptionalSchema("eval_datasets", default=[]): list, } ) ) @@ -634,6 +638,7 @@ def __init__( license_type: str, deploy_license_type: str, dataset: List[str], + labels_file: str | None, technical_details: Dict[str, str], ) -> None: self.name = name @@ -652,6 +657,7 @@ def __init__( self.license_type = license_type self.deploy_license_type = deploy_license_type self.dataset = dataset + self.labels_file = labels_file self.source_repo = source_repo self.applicable_scenarios = applicable_scenarios self.related_models = related_models @@ -724,6 +730,11 @@ def validate(self) -> Tuple[bool, Optional[str]]: "`status_reason` in info.yaml should not be set for public models.", ) + # Labels file + if self.labels_file is not None: + if not os.path.exists(ASSET_CONFIG.get_labels_file_path(self.labels_file)): + return False, f"Invalid labels file: {self.labels_file}" + # Required assets exist if self.status == MODEL_STATUS.PUBLIC: if not os.path.exists(self.get_package_path() / "info.yaml"): @@ -775,6 +786,11 @@ def get_demo_path(self): ASSET_CONFIG.get_qaihm_repo(self.id, relative=False), "demo.py" ) + def get_labels_file_path(self): + if self.labels_file is None: + return None + return ASSET_CONFIG.get_labels_file_path(self.labels_file) + def get_info_yaml_path(self, root: Path = QAIHM_PACKAGE_ROOT): return self.get_package_path(root) / "info.yaml" @@ -861,6 +877,7 @@ def from_yaml( info_yaml["license_type"], info_yaml["deploy_license_type"], info_yaml["dataset"], + info_yaml.get("labels_file", None), info_yaml["technical_details"], ) @@ -891,6 +908,7 @@ def from_yaml( "license_type": str, "deploy_license_type": str, "dataset": list, + OptionalSchema("labels_file", default=None): str, } ) ) diff --git a/qai_hub_models/utils/evaluate.py b/qai_hub_models/utils/evaluate.py index 52b8627b..33d8774b 100644 --- a/qai_hub_models/utils/evaluate.py +++ b/qai_hub_models/utils/evaluate.py @@ -303,13 +303,13 @@ def evaluate_on_dataset( compiled_model: hub.Model, torch_model: BaseModel, hub_device: hub.Device, - dataset_name: str = "imagenette", - split_size: int = 2500, - num_samples: int = 100, + dataset_name: str, + split_size: int, + num_samples: int, seed: int = 42, profile_options: str = "", use_cache: bool = False, -) -> None: +) -> Tuple[str, str]: """ Evaluate model accuracy on a dataset both on device and with PyTorch. @@ -327,6 +327,9 @@ def evaluate_on_dataset( use_cache: If set, will upload the full dataset to hub and store a local copy. This prevents re-uploading data to hub for each evaluation, with the tradeoff of increased initial overhead. + + Returns: + Tuple of (torch accuracy, on device accuracy) both as formatted strings. """ assert isinstance(torch_model, EvalModelProtocol), "Model must have an evaluator." _validate_inputs(num_samples) @@ -383,7 +386,10 @@ def evaluate_on_dataset( f"Cumulative on device accuracy on batch {i + 1}/{num_batches}: " f"{on_device_evaluator.formatted_accuracy()}" ) + torch_accuracy = torch_evaluator.formatted_accuracy() + on_device_accuracy = on_device_evaluator.formatted_accuracy() print("\nFinal accuracy:") - print(f"torch: {torch_evaluator.formatted_accuracy()}") - print(f"on-device: {on_device_evaluator.formatted_accuracy()}") + print(f"torch: {torch_accuracy}") + print(f"on-device: {on_device_accuracy}") + return (torch_accuracy, on_device_accuracy) diff --git a/qai_hub_models/utils/inference.py b/qai_hub_models/utils/inference.py index e46e4583..6c48856b 100644 --- a/qai_hub_models/utils/inference.py +++ b/qai_hub_models/utils/inference.py @@ -212,7 +212,9 @@ def compile_zoo_model_to_hub( def compile_model_from_args( - model_id: str, cli_args: argparse.Namespace, model_kwargs: Mapping[str, Any] + model_id: str, + cli_args: argparse.Namespace, + model_kwargs: Mapping[str, Any], ) -> hub.Model: export_file = f"qai_hub_models.models.{model_id}.export" export_module = import_module(export_file) @@ -228,6 +230,7 @@ def compile_model_from_args( ) export_output = export_module.export_model( device=cli_args.device, + chipset=cli_args.chipset, skip_profiling=True, skip_inferencing=True, skip_downloading=True, diff --git a/qai_hub_models/utils/printing.py b/qai_hub_models/utils/printing.py index 95aa9bdc..1074a3d4 100644 --- a/qai_hub_models/utils/printing.py +++ b/qai_hub_models/utils/printing.py @@ -14,7 +14,7 @@ from qai_hub_models.utils.base_model import TargetRuntime from qai_hub_models.utils.compare import METRICS_FUNCTIONS, generate_comparison_metrics -from qai_hub_models.utils.config_loaders import QAIHMModelPerf +from qai_hub_models.utils.config_loaders import QAIHMModelPerf, bytes_to_mb from qai_hub_models.utils.qnn_helpers import is_qnn_hub_model _INFO_DASH = "-" * 60 @@ -122,7 +122,7 @@ def print_profile_metrics( details: QAIHMModelPerf.ModelRuntimePerformanceDetails, ): inf_time = details.inference_time_ms - peak_memory_mb = f"[{round(details.peak_memory_bytes[0] / 1e6)}, {round(details.peak_memory_bytes[1] / 1e6)}]" + peak_memory_mb = f"[{bytes_to_mb(details.peak_memory_bytes[0])}, {bytes_to_mb(details.peak_memory_bytes[1])}]" num_ops = sum(details.compute_unit_counts.values()) compute_units = [ f"{unit} ({num_ops} ops)" diff --git a/qai_hub_models/utils/quantization_aimet.py b/qai_hub_models/utils/quantization_aimet.py index df55cbb0..22a6a29c 100644 --- a/qai_hub_models/utils/quantization_aimet.py +++ b/qai_hub_models/utils/quantization_aimet.py @@ -65,6 +65,7 @@ def _should_tie_observers(op: torch.nn.Module) -> bool: nn.Upsample, aimet_ops.Concat, aimet_ops.Interpolate, + aimet_ops.MaxPool2d, ] for op_type in op_types_to_tie: if isinstance(wrapped_op, op_type): @@ -458,43 +459,17 @@ def get_hub_compile_options( other_compile_options: str = "", device: Optional[Device] = None, ) -> str: - compile_options = super().get_hub_compile_options( # type: ignore - target_runtime, other_compile_options, device - ) - if target_runtime != TargetRuntime.ORT: - # TODO(#10896): Restore quantize_io flag when targeting ORT - compile_options = ( - compile_options + " --quantize_full_type int8 --quantize_io" + quantization_flags = " --quantize_io" + if target_runtime not in [TargetRuntime.ORT, TargetRuntime.PRECOMPILED_ORT]: + quantization_flags += " --quantize_full_type int8" + return ( + super().get_hub_compile_options( # type: ignore + target_runtime, other_compile_options, device ) - return compile_options + + quantization_flags + ) def preferred_hub_source_model_format( self, target_runtime: TargetRuntime ) -> SourceModelFormat: return SourceModelFormat.ONNX - - -def tie_aimet_observer_groups(groups: List[List[Any]]): - """ - Unless you're doing something very customized, you likely want to use - the `tie_observers` method instead. - - This defines groups of ops that all should use the same output - quantizer observer. The input groups is a list of lists, where the - inner lists contain op references that should all use the same output - quantizer. Each op should have an `output_quantizers` member. - - Example: - - groups = [ - [ - sim.model.net.maxpool2, - sim.model.net.Mixed_5b.module_avg_pool2d, - ], - ] - _tie_aimet_observer_groups(groups) - """ - for group in groups: - output_quantizer = group[0].output_quantizers[0] - for op in group[1:]: - op.output_quantizers[0] = output_quantizer diff --git a/qai_hub_models/utils/scorecard/common.py b/qai_hub_models/utils/scorecard/common.py index c50b66d9..a8395ec0 100644 --- a/qai_hub_models/utils/scorecard/common.py +++ b/qai_hub_models/utils/scorecard/common.py @@ -139,11 +139,6 @@ def get_test_devices( return [x for x in devices if x.enabled()] if only_enabled else devices def get_compile_options(self, aimet_model=False) -> str: - if aimet_model and self.get_runtime() == TargetRuntime.ORT: - # TODO(#10896): Restore quantize_io flag to - # the default set of flags used to target ORT. - # This flag can be removed when that happens. - return "--quantize_io" return "" def get_job_cache_name( @@ -251,7 +246,7 @@ def get_test_devices( ScorecardDevice.cs_8_gen_3, ScorecardDevice.cs_x_elite, ScorecardDevice.cs_8550, - ] + ([ScorecardDevice.cs_6490] if aimet_model else []) + ] elif self == ScorecardProfilePath.ORT_DML_GPU: devices = [ScorecardDevice.cs_x_elite] else: diff --git a/qai_hub_models/utils/scorecard/job_summary.py b/qai_hub_models/utils/scorecard/job_summary.py index 77c2938a..f9d0b1cd 100644 --- a/qai_hub_models/utils/scorecard/job_summary.py +++ b/qai_hub_models/utils/scorecard/job_summary.py @@ -2,6 +2,7 @@ # Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------------- +import datetime from dataclasses import dataclass from functools import cached_property from typing import Any, Dict, List, Optional, Type, Union, cast @@ -105,6 +106,12 @@ def quantized(self) -> str: else "No" ) + @cached_property + def date(self) -> Optional[datetime.datetime]: + if self.job is None: + return None + return self.job.date + @dataclass class CompileJobSummary(JobSummary): diff --git a/qai_hub_models/utils/scorecard/model_card.py b/qai_hub_models/utils/scorecard/model_card.py index c812eda1..15849492 100644 --- a/qai_hub_models/utils/scorecard/model_card.py +++ b/qai_hub_models/utils/scorecard/model_card.py @@ -4,7 +4,6 @@ # --------------------------------------------------------------------- from __future__ import annotations -import datetime import functools import multiprocessing import pprint @@ -93,7 +92,7 @@ def supported_chipsets_santized(chips) -> List[str]: __CHIP_SUPPORTED_DEVICES_CACHE: Dict[str, List[str]] = {} -def supported_devices(chips) -> List[str]: +def get_supported_devices(chips) -> List[str]: """Return all the supported devices given the chipset being used.""" supported_devices = set( [ @@ -110,7 +109,9 @@ def supported_devices(chips) -> List[str]: supported_devices_for_chip = __CHIP_SUPPORTED_DEVICES_CACHE.get(chip, list()) if not supported_devices_for_chip: supported_devices_for_chip = [ - device.name for device in hub.get_devices(attributes=f"chipset:{chip}") + device.name + for device in hub.get_devices(attributes=f"chipset:{chip}") + if "(Family)" not in device.name ] __CHIP_SUPPORTED_DEVICES_CACHE[chip] = supported_devices_for_chip supported_devices.update(supported_devices_for_chip) @@ -175,6 +176,7 @@ def get_perf_card( exclude_paths: Iterable[ScorecardProfilePath] = [], ) -> Dict[str, str | Dict[str, str]]: perf_card: Dict[str, str | Dict[str, str]] = {} + max_date = None for path, run in self.run_per_path.items(): if ( not run.skipped # Skipped runs are not included @@ -185,8 +187,17 @@ def get_perf_card( ) # exclude failed jobs if requested ): perf_card[path.long_name] = run.performance_metrics + if max_date is None: + max_date = run.date + elif run.date is not None: + max_date = max(max_date, run.date) + if not perf_card: + return {} perf_card["reference_device_info"] = get_reference_device_info(self.device) - perf_card["timestamp"] = datetime.datetime.utcnow().isoformat() + "Z" + # The timestamp for the device is the latest creation time among the runs + # If max_date is still None for some reason, something went wrong + assert max_date is not None + perf_card["timestamp"] = max_date.isoformat() + "Z" return perf_card def __repr__(self) -> str: @@ -225,7 +236,10 @@ def get_perf_card( ) -> List[Dict[str, Union[str, Dict[str, str]]]]: perf_card = [] for summary in self.runs_per_device.values(): - perf_card.append(summary.get_perf_card(include_failed_jobs, exclude_paths)) + device_summary = summary.get_perf_card(include_failed_jobs, exclude_paths) + # If device had no runs, omit it from the card + if device_summary: + perf_card.append(device_summary) return perf_card def __repr__(self): @@ -318,7 +332,7 @@ def get_perf_card( chips = self.get_chipsets() perf_card["aggregated"] = dict( supported_oses=supported_oses(), - supported_devices=supported_devices(chips), + supported_devices=get_supported_devices(chips), supported_chipsets=supported_chipsets_santized(chips), ) diff --git a/scripts/build_and_test.py b/scripts/build_and_test.py index 471bae6c..a5fa7643 100755 --- a/scripts/build_and_test.py +++ b/scripts/build_and_test.py @@ -448,7 +448,6 @@ def test_profile_all_models( def test_all_models_long( self, plan: Plan, step_id: str = "test_all_models_long" ) -> str: - # Includes export tests, and creates a fresh environment for each model. all_models = get_all_models() return plan.add_step( step_id, @@ -458,7 +457,6 @@ def test_all_models_long( all_models, self.venv_path, venv_for_each_model=False, - skip_standard_unit_test=True, use_shared_cache=True, test_trace=False, ), diff --git a/scripts/examples/quantize_imagenet_classifier.py b/scripts/examples/quantize_imagenet_classifier.py index 907e877b..c4abb91f 100644 --- a/scripts/examples/quantize_imagenet_classifier.py +++ b/scripts/examples/quantize_imagenet_classifier.py @@ -22,6 +22,7 @@ ) from qai_hub_models.models.googlenet_quantized.model import GoogLeNetQuantizable from qai_hub_models.models.inception_v3_quantized.model import InceptionNetV3Quantizable +from qai_hub_models.models.midas_quantized.model import MidasQuantizable from qai_hub_models.models.mobilenet_v2_quantized.model import MobileNetV2Quantizable from qai_hub_models.models.mobilenet_v3_large_quantized.model import ( MobileNetV3LargeQuantizable, @@ -37,9 +38,10 @@ from qai_hub_models.models.wideresnet50_quantized.model import WideResNet50Quantizable from qai_hub_models.utils.quantization_aimet import AIMETQuantizableMixin -CLASSIFIERS = { +MODELS = { "googlenet": GoogLeNetQuantizable, "inception_v3": InceptionNetV3Quantizable, + "midas": MidasQuantizable, "mobilenet_v2": MobileNetV2Quantizable, "mobilenet_v3_large": MobileNetV3LargeQuantizable, "regnet": RegNetQuantizable, @@ -55,6 +57,10 @@ "convnext_tiny_w8a16": ConvNextTinyW8A16Quantizable, } +# These models are quantized by imagenet data, but are not classifiers +# Don't try to compute accuracy for these models +NON_CLASSIFIERS = ["midas"] + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( @@ -82,7 +88,7 @@ "--model", "-m", type=str, - choices=list(CLASSIFIERS.keys()), + choices=list(MODELS.keys()), required=False, help="Name of the model to quantize.", ) @@ -101,17 +107,18 @@ ) args = parser.parse_args() if args.all: - ImageNetClassifier_classes = CLASSIFIERS.values() + ImageNetClassifier_classes = MODELS.values() else: if not hasattr(args, "model"): raise ValueError( "Specify a model via --model or all models via --all" ) - ImageNetClassifier_classes = [CLASSIFIERS[args.model]] + ImageNetClassifier_classes = [MODELS[args.model]] dataset = ImagenetteDataset() torch.manual_seed(args.seed) dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True) + skip_accuracy = args.model in NON_CLASSIFIERS for ImageNetClassifier_cls in ImageNetClassifier_classes: model: AIMETQuantizableMixin = ImageNetClassifier_cls.from_pretrained( @@ -119,24 +126,23 @@ ) print(f"\nQuantizing {ImageNetClassifier_cls.__name__}") - evaluator = model.get_evaluator() - - evaluator.reset() - evaluator.add_from_dataset(model, dataloader, args.num_iter) - accuracy_fp32 = evaluator.get_accuracy_score() + if not skip_accuracy: + evaluator = model.get_evaluator() + evaluator.reset() + evaluator.add_from_dataset(model, dataloader, args.num_iter) + accuracy_fp32 = evaluator.get_accuracy_score() + print(f"FP32 Accuracy: {accuracy_fp32 * 100:.3g}%") model.quantize(dataloader, args.num_iter, data_has_gt=True) - evaluator.reset() - evaluator.add_from_dataset(model, dataloader, args.num_iter) - accuracy_int8 = evaluator.get_accuracy_score() + if not skip_accuracy: + evaluator = model.get_evaluator() + evaluator.add_from_dataset(model, dataloader, args.num_iter) + accuracy_int8 = evaluator.get_accuracy_score() - print(f"FP32 Accuracy: {accuracy_fp32 * 100:.3g}%") - print(f"INT8 Accuracy: {accuracy_int8 * 100:.3g}%") + print(f"INT8 Accuracy: {accuracy_int8 * 100:.3g}%") output_path = args.output_dir or str(Path() / "build") - output_name = ( - args.output_name or f"{ImageNetClassifier_cls.__name__}_quantized_encodings" - ) + output_name = args.output_name or f"{args.model}_quantized_encodings" model.quant_sim.save_encodings_to_json(output_path, output_name) - print(f"Wrote {output_path}/{output_name}\n") + print(f"Wrote {output_path}/{output_name}.json\n") diff --git a/scripts/examples/quantize_hrnet.py b/scripts/examples/quantize_pose_detector.py similarity index 74% rename from scripts/examples/quantize_hrnet.py rename to scripts/examples/quantize_pose_detector.py index 835beb6c..01fd9745 100644 --- a/scripts/examples/quantize_hrnet.py +++ b/scripts/examples/quantize_pose_detector.py @@ -14,11 +14,20 @@ from qai_hub_models.datasets.coco import CocoDataset from qai_hub_models.models.hrnet_pose.app import HRNetPoseApp +from qai_hub_models.models.hrnet_pose.model import HRNetPose from qai_hub_models.models.hrnet_pose_quantized.model import HRNetPoseQuantizable +from qai_hub_models.models.posenet_mobilenet_quantized.model import ( + PosenetMobilenetQuantizable, +) + +MODELS = { + "hrnet_pose": HRNetPoseQuantizable, + "posenet_mobilenet": PosenetMobilenetQuantizable, +} # Create custom data loader for this model that does the preprocessing -class HRNetCocoDataset(CocoDataset): +class PoseCocoDataset(CocoDataset): def __init__(self, preprocess_lambda, target_image_size=640): super().__init__(target_image_size) self.preprocess_lambda = preprocess_lambda @@ -41,6 +50,13 @@ def __getitem__(self, item): default=None, help="Directory where encodings should be stored. Defaults to ./build.", ) + parser.add_argument( + "--model", + type=str, + choices=MODELS.keys(), + required=True, + help="Name of the model to quantize.", + ) parser.add_argument( "--output-name", type=str, @@ -56,11 +72,13 @@ def __getitem__(self, item): args = parser.parse_args() torch.manual_seed(args.seed) - model = HRNetPoseQuantizable.from_pretrained(aimet_encodings=None) - app = HRNetPoseApp(model) + model_cls = MODELS[args.model] + model = model_cls.from_pretrained(aimet_encodings=None) # Initialize Data Loader - dataset = HRNetCocoDataset(app.preprocess_input) + dataset = PoseCocoDataset( + HRNetPoseApp(HRNetPose.from_pretrained()).preprocess_input + ) # TODO(10491) Add metrics computation here @@ -69,5 +87,5 @@ def __getitem__(self, item): # Export encodings output_path = args.output_dir or str(Path() / "build") - output_name = args.output_name or "hrnet_pose_quantized_encodings" + output_name = args.output_name or f"{args.model}_quantized_encodings" model.quant_sim.save_encodings_to_json(output_path, output_name) diff --git a/scripts/examples/quantize_superresolution.py b/scripts/examples/quantize_superresolution.py index 6807c7d2..055ee17e 100644 --- a/scripts/examples/quantize_superresolution.py +++ b/scripts/examples/quantize_superresolution.py @@ -23,6 +23,7 @@ from qai_hub_models.models.quicksrnetsmall_quantized.model import ( QuickSRNetSmallQuantizable, ) +from qai_hub_models.models.sesr_m5_quantized.model import SESR_M5Quantizable from qai_hub_models.models.xlsr_quantized.model import XLSRQuantizable from qai_hub_models.utils.quantization_aimet import ( # isort: skip @@ -34,6 +35,7 @@ "quicksrnetsmall": QuickSRNetSmallQuantizable, "quicksrnetmedium": QuickSRNetMediumQuantizable, "quicksrnetlarge": QuickSRNetLargeQuantizable, + "sesr_m5": SESR_M5Quantizable, } @@ -77,11 +79,13 @@ parser.add_argument( "--scale-factor", type=int, - default=4, + default=3, help="Scaling factor of the model.", ) args = parser.parse_args() - model = MODELS[args.model].from_pretrained(aimet_encodings=None) + model = MODELS[args.model].from_pretrained( + aimet_encodings=None, scale_factor=args.scale_factor + ) # Load dataset dataset = BSD300Dataset(scaling_factor=args.scale_factor) diff --git a/scripts/tasks/changes.py b/scripts/tasks/changes.py index bebe6068..f4eb73fb 100644 --- a/scripts/tasks/changes.py +++ b/scripts/tasks/changes.py @@ -260,4 +260,14 @@ def get_all_models() -> Iterable[str]: for model_name in os.listdir(PY_PACKAGE_MODELS_ROOT): if os.path.exists(os.path.join(PY_PACKAGE_MODELS_ROOT, model_name, "model.py")): model_names.add(model_name) + + # Select a subset of models based on user input + allowed_models = os.environ.get("QAIHM_TEST_MODELS", None) + if allowed_models and allowed_models.upper() != "ALL": + allowed_models = allowed_models.split(",") + for model in allowed_models: + if model not in model_names: + raise ValueError(f"Unknown model selected: {model}") + model_names = allowed_models + return model_names diff --git a/scripts/tasks/util.py b/scripts/tasks/util.py index 5f6bc438..9625ff3b 100644 --- a/scripts/tasks/util.py +++ b/scripts/tasks/util.py @@ -36,9 +36,7 @@ def new_cd(x): def can_support_aimet(platform: str = sys.platform) -> bool: - return ( - platform == "linux" or platform == "linux2" - ) and sys.version_info.minor == 8 # python 3.8 only + return platform == "linux" or platform == "linux2" def model_needs_aimet(model_name: str) -> bool: